aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/IRQ-affinity.txt37
-rw-r--r--Documentation/cputopology.txt26
-rw-r--r--Documentation/feature-removal-schedule.txt7
-rw-r--r--Documentation/filesystems/ext4.txt125
-rw-r--r--Documentation/filesystems/gfs2-glocks.txt114
-rw-r--r--Documentation/filesystems/proc.txt29
-rw-r--r--Documentation/i2c/busses/i2c-i81047
-rw-r--r--Documentation/i2c/busses/i2c-prosavage23
-rw-r--r--Documentation/i2c/busses/i2c-savage426
-rw-r--r--Documentation/i2c/fault-codes127
-rw-r--r--Documentation/i2c/smbus-protocol4
-rw-r--r--Documentation/i2c/writing-clients51
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--MAINTAINERS7
-rw-r--r--arch/alpha/kernel/irq.c5
-rw-r--r--arch/arm/kernel/stacktrace.c1
-rw-r--r--arch/avr32/kernel/stacktrace.c1
-rw-r--r--arch/mips/kernel/stacktrace.c1
-rw-r--r--arch/mips/sibyte/swarm/Makefile1
-rw-r--r--arch/mips/sibyte/swarm/swarm-i2c.c37
-rw-r--r--arch/powerpc/kernel/stacktrace.c2
-rw-r--r--arch/s390/kernel/stacktrace.c2
-rw-r--r--arch/sh/kernel/stacktrace.c1
-rw-r--r--arch/sparc64/kernel/stacktrace.c2
-rw-r--r--arch/x86/Kconfig22
-rw-r--r--arch/x86/kernel/acpi/sleep.c30
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/stacktrace.c2
-rw-r--r--block/blk-core.c12
-rw-r--r--block/blk-exec.c6
-rw-r--r--drivers/ata/ahci.c331
-rw-r--r--drivers/ata/libata-core.c56
-rw-r--r--drivers/ata/libata-eh.c219
-rw-r--r--drivers/ata/libata-pmp.c13
-rw-r--r--drivers/ata/libata-scsi.c99
-rw-r--r--drivers/ata/libata-sff.c15
-rw-r--r--drivers/ata/libata.h2
-rw-r--r--drivers/ata/pata_bf54x.c6
-rw-r--r--drivers/ata/pata_legacy.c10
-rw-r--r--drivers/ata/pata_qdi.c2
-rw-r--r--drivers/ata/pata_scc.c2
-rw-r--r--drivers/ata/pata_winbond.c2
-rw-r--r--drivers/ata/sata_svw.c38
-rw-r--r--drivers/base/topology.c32
-rw-r--r--drivers/block/paride/pd.c20
-rw-r--r--drivers/clocksource/acpi_pm.c19
-rw-r--r--drivers/gpu/drm/drm_memory.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_cp.c2
-rw-r--r--drivers/i2c/algos/i2c-algo-bit.c4
-rw-r--r--drivers/i2c/algos/i2c-algo-pca.c2
-rw-r--r--drivers/i2c/algos/i2c-algo-pcf.c48
-rw-r--r--drivers/i2c/busses/Kconfig715
-rw-r--r--drivers/i2c/busses/Makefile57
-rw-r--r--drivers/i2c/busses/i2c-ali1535.c38
-rw-r--r--drivers/i2c/busses/i2c-ali1563.c38
-rw-r--r--drivers/i2c/busses/i2c-ali15x3.c32
-rw-r--r--drivers/i2c/busses/i2c-amd756-s4882.c4
-rw-r--r--drivers/i2c/busses/i2c-amd756.c35
-rw-r--r--drivers/i2c/busses/i2c-amd8111.c54
-rw-r--r--drivers/i2c/busses/i2c-au1550.c130
-rw-r--r--drivers/i2c/busses/i2c-cpm.c745
-rw-r--r--drivers/i2c/busses/i2c-davinci.c89
-rw-r--r--drivers/i2c/busses/i2c-elektor.c4
-rw-r--r--drivers/i2c/busses/i2c-gpio.c2
-rw-r--r--drivers/i2c/busses/i2c-hydra.c3
-rw-r--r--drivers/i2c/busses/i2c-i801.c284
-rw-r--r--drivers/i2c/busses/i2c-i810.c260
-rw-r--r--drivers/i2c/busses/i2c-ibm_iic.c206
-rw-r--r--drivers/i2c/busses/i2c-iop3xx.c2
-rw-r--r--drivers/i2c/busses/i2c-isch.c339
-rw-r--r--drivers/i2c/busses/i2c-mpc.c2
-rw-r--r--drivers/i2c/busses/i2c-mv64xxx.c2
-rw-r--r--drivers/i2c/busses/i2c-nforce2-s4985.c257
-rw-r--r--drivers/i2c/busses/i2c-nforce2.c49
-rw-r--r--drivers/i2c/busses/i2c-ocores.c44
-rw-r--r--drivers/i2c/busses/i2c-pasemi.c2
-rw-r--r--drivers/i2c/busses/i2c-pca-platform.c2
-rw-r--r--drivers/i2c/busses/i2c-piix4.c73
-rw-r--r--drivers/i2c/busses/i2c-pmcmsp.c2
-rw-r--r--drivers/i2c/busses/i2c-prosavage.c325
-rw-r--r--drivers/i2c/busses/i2c-pxa.c2
-rw-r--r--drivers/i2c/busses/i2c-s3c2410.c2
-rw-r--r--drivers/i2c/busses/i2c-savage4.c185
-rw-r--r--drivers/i2c/busses/i2c-sibyte.c8
-rw-r--r--drivers/i2c/busses/i2c-sis5595.c29
-rw-r--r--drivers/i2c/busses/i2c-sis630.c59
-rw-r--r--drivers/i2c/busses/i2c-sis96x.c37
-rw-r--r--drivers/i2c/busses/i2c-stub.c6
-rw-r--r--drivers/i2c/busses/i2c-taos-evm.c5
-rw-r--r--drivers/i2c/busses/i2c-via.c5
-rw-r--r--drivers/i2c/busses/i2c-viapro.c31
-rw-r--r--drivers/i2c/busses/i2c-voodoo3.c2
-rw-r--r--drivers/i2c/busses/scx200_acb.c2
-rw-r--r--drivers/i2c/chips/Kconfig43
-rw-r--r--drivers/i2c/chips/Makefile1
-rw-r--r--drivers/i2c/chips/at24.c583
-rw-r--r--drivers/i2c/chips/eeprom.c81
-rw-r--r--drivers/i2c/chips/max6875.c4
-rw-r--r--drivers/i2c/chips/pca9539.c25
-rw-r--r--drivers/i2c/chips/pcf8574.c25
-rw-r--r--drivers/i2c/chips/pcf8591.c33
-rw-r--r--drivers/i2c/i2c-core.c575
-rw-r--r--drivers/i2c/i2c-dev.c12
-rw-r--r--drivers/ide/Kconfig6
-rw-r--r--drivers/ide/Makefile1
-rw-r--r--drivers/ide/arm/palm_bk3710.c3
-rw-r--r--drivers/ide/h8300/ide-h8300.c6
-rw-r--r--drivers/ide/ide-acpi.c6
-rw-r--r--drivers/ide/ide-atapi.c296
-rw-r--r--drivers/ide/ide-cd.c122
-rw-r--r--drivers/ide/ide-cd.h4
-rw-r--r--drivers/ide/ide-cd_ioctl.c113
-rw-r--r--drivers/ide/ide-disk.c13
-rw-r--r--drivers/ide/ide-dma.c4
-rw-r--r--drivers/ide/ide-floppy.c357
-rw-r--r--drivers/ide/ide-io.c114
-rw-r--r--drivers/ide/ide-iops.c38
-rw-r--r--drivers/ide/ide-probe.c10
-rw-r--r--drivers/ide/ide-tape.c561
-rw-r--r--drivers/ide/ide-taskfile.c38
-rw-r--r--drivers/ide/ide-timing.h1
-rw-r--r--drivers/ide/ide.c360
-rw-r--r--drivers/ide/legacy/ali14xx.c2
-rw-r--r--drivers/ide/legacy/gayle.c4
-rw-r--r--drivers/ide/legacy/ht6560b.c2
-rw-r--r--drivers/ide/legacy/qd65xx.c4
-rw-r--r--drivers/ide/pci/aec62xx.c2
-rw-r--r--drivers/ide/pci/alim15x3.c2
-rw-r--r--drivers/ide/pci/amd74xx.c16
-rw-r--r--drivers/ide/pci/cmd640.c8
-rw-r--r--drivers/ide/pci/cmd64x.c4
-rw-r--r--drivers/ide/pci/cy82c693.c2
-rw-r--r--drivers/ide/pci/hpt366.c3
-rw-r--r--drivers/ide/pci/ns87415.c4
-rw-r--r--drivers/ide/pci/scc_pata.c12
-rw-r--r--drivers/ide/pci/sgiioc4.c7
-rw-r--r--drivers/ide/pci/siimage.c25
-rw-r--r--drivers/ide/pci/via82cxxx.c16
-rw-r--r--drivers/ide/ppc/pmac.c6
-rw-r--r--drivers/ide/setup-pci.c4
-rw-r--r--drivers/infiniband/core/addr.c42
-rw-r--r--drivers/infiniband/core/agent.h2
-rw-r--r--drivers/infiniband/core/cache.c2
-rw-r--r--drivers/infiniband/core/cm.c2
-rw-r--r--drivers/infiniband/core/cma.c162
-rw-r--r--drivers/infiniband/core/core_priv.h2
-rw-r--r--drivers/infiniband/core/device.c2
-rw-r--r--drivers/infiniband/core/fmr_pool.c2
-rw-r--r--drivers/infiniband/core/mad_priv.h2
-rw-r--r--drivers/infiniband/core/mad_rmpp.c2
-rw-r--r--drivers/infiniband/core/mad_rmpp.h2
-rw-r--r--drivers/infiniband/core/packer.c2
-rw-r--r--drivers/infiniband/core/sa_query.c24
-rw-r--r--drivers/infiniband/core/sysfs.c122
-rw-r--r--drivers/infiniband/core/ucm.c2
-rw-r--r--drivers/infiniband/core/ud_header.c2
-rw-r--r--drivers/infiniband/core/umem.c2
-rw-r--r--drivers/infiniband/core/user_mad.c2
-rw-r--r--drivers/infiniband/core/uverbs.h2
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c4
-rw-r--r--drivers/infiniband/core/uverbs_main.c2
-rw-r--r--drivers/infiniband/core/verbs.c49
-rw-r--r--drivers/infiniband/hw/amso1100/c2_rnic.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c27
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h5
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h103
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c8
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c15
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c199
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h8
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c261
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c9
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c14
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c10
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.h1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba7220.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c3
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c12
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c3
-rw-r--r--drivers/infiniband/hw/mlx4/main.c7
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h3
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c73
-rw-r--r--drivers/infiniband/hw/mthca/mthca_allocator.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c17
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_config_reg.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_doorbell.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mcg.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_pd.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c32
-rw-r--r--drivers/infiniband/hw/mthca/mthca_reset.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_uar.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_wqe.h2
-rw-r--r--drivers/infiniband/hw/nes/nes.c2
-rw-r--r--drivers/infiniband/hw/nes/nes.h9
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c1
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c68
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c33
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c207
-rw-r--r--drivers/infiniband/ulp/ipoib/Kconfig1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h48
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c104
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c46
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c52
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c115
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c27
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c69
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c2
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c3
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h2
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c2
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c2
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c2
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c15
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h2
-rw-r--r--drivers/net/cxgb3/cxgb3_ctl_defs.h1
-rw-r--r--drivers/net/cxgb3/cxgb3_offload.c7
-rw-r--r--drivers/net/cxgb3/version.h2
-rw-r--r--drivers/net/mlx4/fw.c28
-rw-r--r--drivers/net/mlx4/fw.h6
-rw-r--r--drivers/net/mlx4/main.c7
-rw-r--r--drivers/net/mlx4/mcg.c17
-rw-r--r--drivers/scsi/ide-scsi.c302
-rw-r--r--drivers/video/fb_ddc.c1
-rw-r--r--drivers/video/intelfb/intelfb_i2c.c12
-rw-r--r--drivers/video/matrox/i2c-matroxfb.c20
-rw-r--r--fs/buffer.c19
-rw-r--r--fs/ext4/balloc.c209
-rw-r--r--fs/ext4/dir.c17
-rw-r--r--fs/ext4/ext4.h61
-rw-r--r--fs/ext4/ext4_extents.h1
-rw-r--r--fs/ext4/ext4_i.h10
-rw-r--r--fs/ext4/ext4_jbd2.h21
-rw-r--r--fs/ext4/ext4_sb.h5
-rw-r--r--fs/ext4/extents.c111
-rw-r--r--fs/ext4/file.c20
-rw-r--r--fs/ext4/fsync.c4
-rw-r--r--fs/ext4/group.h2
-rw-r--r--fs/ext4/ialloc.c113
-rw-r--r--fs/ext4/inode.c1591
-rw-r--r--fs/ext4/mballoc.c451
-rw-r--r--fs/ext4/namei.c45
-rw-r--r--fs/ext4/resize.c52
-rw-r--r--fs/ext4/super.c142
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/ext4/xattr_trusted.c4
-rw-r--r--fs/ext4/xattr_user.c4
-rw-r--r--fs/gfs2/Kconfig18
-rw-r--r--fs/gfs2/Makefile1
-rw-r--r--fs/gfs2/gfs2.h5
-rw-r--r--fs/gfs2/glock.c1643
-rw-r--r--fs/gfs2/glock.h11
-rw-r--r--fs/gfs2/glops.c70
-rw-r--r--fs/gfs2/incore.h38
-rw-r--r--fs/gfs2/inode.c11
-rw-r--r--fs/gfs2/inode.h2
-rw-r--r--fs/gfs2/locking.c52
-rw-r--r--fs/gfs2/locking/dlm/lock.c368
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h18
-rw-r--r--fs/gfs2/locking/dlm/mount.c14
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c13
-rw-r--r--fs/gfs2/locking/dlm/thread.c331
-rw-r--r--fs/gfs2/locking/nolock/Makefile3
-rw-r--r--fs/gfs2/locking/nolock/main.c238
-rw-r--r--fs/gfs2/log.c2
-rw-r--r--fs/gfs2/log.h2
-rw-r--r--fs/gfs2/main.c2
-rw-r--r--fs/gfs2/meta_io.c14
-rw-r--r--fs/gfs2/meta_io.h1
-rw-r--r--fs/gfs2/ops_address.c40
-rw-r--r--fs/gfs2/ops_file.c38
-rw-r--r--fs/gfs2/ops_fstype.c8
-rw-r--r--fs/gfs2/ops_inode.c25
-rw-r--r--fs/gfs2/ops_super.c4
-rw-r--r--fs/gfs2/quota.c2
-rw-r--r--fs/gfs2/recovery.c5
-rw-r--r--fs/gfs2/rgrp.c108
-rw-r--r--fs/gfs2/super.c4
-rw-r--r--fs/gfs2/sys.c16
-rw-r--r--fs/jbd2/checkpoint.c1
-rw-r--r--fs/jbd2/commit.c294
-rw-r--r--fs/jbd2/journal.c53
-rw-r--r--fs/jbd2/transaction.c365
-rw-r--r--fs/jfs/jfs_debug.c62
-rw-r--r--fs/jfs/jfs_debug.h10
-rw-r--r--fs/jfs/jfs_dtree.h3
-rw-r--r--fs/jfs/jfs_imap.c2
-rw-r--r--fs/jfs/jfs_logmgr.c35
-rw-r--r--fs/jfs/jfs_metapage.c36
-rw-r--r--fs/jfs/jfs_txnmgr.c68
-rw-r--r--fs/jfs/jfs_xtree.c36
-rw-r--r--fs/jfs/namei.c2
-rw-r--r--fs/jfs/super.c7
-rw-r--r--fs/mpage.c14
-rw-r--r--include/asm-x86/dwarf2.h30
-rw-r--r--include/drm/drmP.h1
-rw-r--r--include/linux/blkdev.h1
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/i2c-algo-pcf.h8
-rw-r--r--include/linux/i2c-id.h3
-rw-r--r--include/linux/i2c.h46
-rw-r--r--include/linux/i2c/at24.h28
-rw-r--r--include/linux/ide.h55
-rw-r--r--include/linux/interrupt.h5
-rw-r--r--include/linux/irq.h9
-rw-r--r--include/linux/jbd2.h73
-rw-r--r--include/linux/libata.h55
-rw-r--r--include/linux/lm_interface.h6
-rw-r--r--include/linux/mlx4/device.h3
-rw-r--r--include/linux/mpage.h10
-rw-r--r--include/linux/percpu_counter.h12
-rw-r--r--include/linux/topology.h13
-rw-r--r--include/linux/writeback.h1
-rw-r--r--include/rdma/ib_addr.h43
-rw-r--r--include/rdma/ib_cache.h2
-rw-r--r--include/rdma/ib_cm.h2
-rw-r--r--include/rdma/ib_fmr_pool.h4
-rw-r--r--include/rdma/ib_mad.h17
-rw-r--r--include/rdma/ib_pack.h2
-rw-r--r--include/rdma/ib_sa.h2
-rw-r--r--include/rdma/ib_smi.h4
-rw-r--r--include/rdma/ib_user_cm.h2
-rw-r--r--include/rdma/ib_user_mad.h2
-rw-r--r--include/rdma/ib_user_verbs.h7
-rw-r--r--include/rdma/ib_verbs.h149
-rw-r--r--include/rdma/iw_cm.h2
-rw-r--r--include/rdma/rdma_cm.h52
-rw-r--r--include/rdma/rdma_cm_ib.h50
-rw-r--r--kernel/backtracetest.c65
-rw-r--r--kernel/hrtimer.c7
-rw-r--r--kernel/irq/manage.c33
-rw-r--r--kernel/irq/proc.c59
-rw-r--r--kernel/posix-cpu-timers.c3
-rw-r--r--kernel/stacktrace.c14
-rw-r--r--kernel/time/tick-sched.c8
-rw-r--r--lib/Kconfig.debug4
-rw-r--r--lib/percpu_counter.c7
-rw-r--r--mm/filemap.c3
-rw-r--r--mm/page-writeback.c3
-rw-r--r--security/selinux/hooks.c22
-rw-r--r--security/selinux/include/security.h2
-rw-r--r--security/selinux/ss/services.c27
368 files changed, 11668 insertions, 8240 deletions
diff --git a/Documentation/IRQ-affinity.txt b/Documentation/IRQ-affinity.txt
index 938d7dd05490..b4a615b78403 100644
--- a/Documentation/IRQ-affinity.txt
+++ b/Documentation/IRQ-affinity.txt
@@ -1,17 +1,26 @@
1ChangeLog:
2 Started by Ingo Molnar <mingo@redhat.com>
3 Update by Max Krasnyansky <maxk@qualcomm.com>
1 4
2SMP IRQ affinity, started by Ingo Molnar <mingo@redhat.com> 5SMP IRQ affinity
3
4 6
5/proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted 7/proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted
6for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed 8for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed
7to turn off all CPUs, and if an IRQ controller does not support IRQ 9to turn off all CPUs, and if an IRQ controller does not support IRQ
8affinity then the value will not change from the default 0xffffffff. 10affinity then the value will not change from the default 0xffffffff.
9 11
12/proc/irq/default_smp_affinity specifies default affinity mask that applies
13to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask
14will be set to the default mask. It can then be changed as described above.
15Default mask is 0xffffffff.
16
10Here is an example of restricting IRQ44 (eth1) to CPU0-3 then restricting 17Here is an example of restricting IRQ44 (eth1) to CPU0-3 then restricting
11the IRQ to CPU4-7 (this is an 8-CPU SMP box): 18it to CPU4-7 (this is an 8-CPU SMP box):
12 19
20[root@moon 44]# cd /proc/irq/44
13[root@moon 44]# cat smp_affinity 21[root@moon 44]# cat smp_affinity
14ffffffff 22ffffffff
23
15[root@moon 44]# echo 0f > smp_affinity 24[root@moon 44]# echo 0f > smp_affinity
16[root@moon 44]# cat smp_affinity 25[root@moon 44]# cat smp_affinity
170000000f 260000000f
@@ -21,17 +30,27 @@ PING hell (195.4.7.3): 56 data bytes
21--- hell ping statistics --- 30--- hell ping statistics ---
226029 packets transmitted, 6027 packets received, 0% packet loss 316029 packets transmitted, 6027 packets received, 0% packet loss
23round-trip min/avg/max = 0.1/0.1/0.4 ms 32round-trip min/avg/max = 0.1/0.1/0.4 ms
24[root@moon 44]# cat /proc/interrupts | grep 44: 33[root@moon 44]# cat /proc/interrupts | grep 'CPU\|44:'
25 44: 0 1785 1785 1783 1783 1 34 CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7
261 0 IO-APIC-level eth1 35 44: 1068 1785 1785 1783 0 0 0 0 IO-APIC-level eth1
36
37As can be seen from the line above IRQ44 was delivered only to the first four
38processors (0-3).
39Now lets restrict that IRQ to CPU(4-7).
40
27[root@moon 44]# echo f0 > smp_affinity 41[root@moon 44]# echo f0 > smp_affinity
42[root@moon 44]# cat smp_affinity
43000000f0
28[root@moon 44]# ping -f h 44[root@moon 44]# ping -f h
29PING hell (195.4.7.3): 56 data bytes 45PING hell (195.4.7.3): 56 data bytes
30.. 46..
31--- hell ping statistics --- 47--- hell ping statistics ---
322779 packets transmitted, 2777 packets received, 0% packet loss 482779 packets transmitted, 2777 packets received, 0% packet loss
33round-trip min/avg/max = 0.1/0.5/585.4 ms 49round-trip min/avg/max = 0.1/0.5/585.4 ms
34[root@moon 44]# cat /proc/interrupts | grep 44: 50[root@moon 44]# cat /proc/interrupts | 'CPU\|44:'
35 44: 1068 1785 1785 1784 1784 1069 1070 1069 IO-APIC-level eth1 51 CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7
36[root@moon 44]# 52 44: 1068 1785 1785 1783 1784 1069 1070 1069 IO-APIC-level eth1
53
54This time around IRQ44 was delivered only to the last four processors.
55i.e counters for the CPU0-3 did not change.
37 56
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index b61cb9564023..bd699da24666 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -14,9 +14,8 @@ represent the thread siblings to cpu X in the same physical package;
14To implement it in an architecture-neutral way, a new source file, 14To implement it in an architecture-neutral way, a new source file,
15drivers/base/topology.c, is to export the 4 attributes. 15drivers/base/topology.c, is to export the 4 attributes.
16 16
17If one architecture wants to support this feature, it just needs to 17For an architecture to support this feature, it must define some of
18implement 4 defines, typically in file include/asm-XXX/topology.h. 18these macros in include/asm-XXX/topology.h:
19The 4 defines are:
20#define topology_physical_package_id(cpu) 19#define topology_physical_package_id(cpu)
21#define topology_core_id(cpu) 20#define topology_core_id(cpu)
22#define topology_thread_siblings(cpu) 21#define topology_thread_siblings(cpu)
@@ -25,17 +24,10 @@ The 4 defines are:
25The type of **_id is int. 24The type of **_id is int.
26The type of siblings is cpumask_t. 25The type of siblings is cpumask_t.
27 26
28To be consistent on all architectures, the 4 attributes should have 27To be consistent on all architectures, include/linux/topology.h
29default values if their values are unavailable. Below is the rule. 28provides default definitions for any of the above macros that are
301) physical_package_id: If cpu has no physical package id, -1 is the 29not defined by include/asm-XXX/topology.h:
31default value. 301) physical_package_id: -1
322) core_id: If cpu doesn't support multi-core, its core id is 0. 312) core_id: 0
333) thread_siblings: Just include itself, if the cpu doesn't support 323) thread_siblings: just the given CPU
34HT/multi-thread. 334) core_siblings: just the given CPU
354) core_siblings: Just include itself, if the cpu doesn't support
36multi-core and HT/Multi-thread.
37
38So be careful when declaring the 4 defines in include/asm-XXX/topology.h.
39
40If an attribute isn't defined on an architecture, it won't be exported.
41
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 46ece3fba6f9..65a1482457a8 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -222,13 +222,6 @@ Who: Thomas Gleixner <tglx@linutronix.de>
222 222
223--------------------------- 223---------------------------
224 224
225What: i2c-i810, i2c-prosavage and i2c-savage4
226When: May 2008
227Why: These drivers are superseded by i810fb, intelfb and savagefb.
228Who: Jean Delvare <khali@linux-fr.org>
229
230---------------------------
231
232What (Why): 225What (Why):
233 - include/linux/netfilter_ipv4/ipt_TOS.h ipt_tos.h header files 226 - include/linux/netfilter_ipv4/ipt_TOS.h ipt_tos.h header files
234 (superseded by xt_TOS/xt_tos target & match) 227 (superseded by xt_TOS/xt_tos target & match)
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 0c5086db8352..80e193d82e2e 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -13,72 +13,93 @@ Mailing list: linux-ext4@vger.kernel.org
131. Quick usage instructions: 131. Quick usage instructions:
14=========================== 14===========================
15 15
16 - Grab updated e2fsprogs from 16 - Compile and install the latest version of e2fsprogs (as of this
17 ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs-interim/ 17 writing version 1.41) from:
18 This is a patchset on top of e2fsprogs-1.39, which can be found at 18
19 http://sourceforge.net/project/showfiles.php?group_id=2406
20
21 or
22
19 ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs/ 23 ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs/
20 24
21 - It's still mke2fs -j /dev/hda1 25 or grab the latest git repository from:
26
27 git://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git
28
29 - Create a new filesystem using the ext4dev filesystem type:
30
31 # mke2fs -t ext4dev /dev/hda1
32
33 Or configure an existing ext3 filesystem to support extents and set
34 the test_fs flag to indicate that it's ok for an in-development
35 filesystem to touch this filesystem:
22 36
23 - mount /dev/hda1 /wherever -t ext4dev 37 # tune2fs -O extents -E test_fs /dev/hda1
24 38
25 - To enable extents, 39 If the filesystem was created with 128 byte inodes, it can be
40 converted to use 256 byte for greater efficiency via:
26 41
27 mount /dev/hda1 /wherever -t ext4dev -o extents 42 # tune2fs -I 256 /dev/hda1
28 43
29 - The filesystem is compatible with the ext3 driver until you add a file 44 (Note: we currently do not have tools to convert an ext4dev
30 which has extents (ie: `mount -o extents', then create a file). 45 filesystem back to ext3; so please do not do try this on production
46 filesystems.)
31 47
32 NOTE: The "extents" mount flag is temporary. It will soon go away and 48 - Mounting:
33 extents will be enabled by the "-o extents" flag to mke2fs or tune2fs 49
50 # mount -t ext4dev /dev/hda1 /wherever
34 51
35 - When comparing performance with other filesystems, remember that 52 - When comparing performance with other filesystems, remember that
36 ext3/4 by default offers higher data integrity guarantees than most. So 53 ext3/4 by default offers higher data integrity guarantees than most.
37 when comparing with a metadata-only journalling filesystem, use `mount -o 54 So when comparing with a metadata-only journalling filesystem, such
38 data=writeback'. And you might as well use `mount -o nobh' too along 55 as ext3, use `mount -o data=writeback'. And you might as well use
39 with it. Making the journal larger than the mke2fs default often helps 56 `mount -o nobh' too along with it. Making the journal larger than
40 performance with metadata-intensive workloads. 57 the mke2fs default often helps performance with metadata-intensive
58 workloads.
41 59
422. Features 602. Features
43=========== 61===========
44 62
452.1 Currently available 632.1 Currently available
46 64
47* ability to use filesystems > 16TB 65* ability to use filesystems > 16TB (e2fsprogs support not available yet)
48* extent format reduces metadata overhead (RAM, IO for access, transactions) 66* extent format reduces metadata overhead (RAM, IO for access, transactions)
49* extent format more robust in face of on-disk corruption due to magics, 67* extent format more robust in face of on-disk corruption due to magics,
50* internal redunancy in tree 68* internal redunancy in tree
51 69* improved file allocation (multi-block alloc)
522.1 Previously available, soon to be enabled by default by "mkefs.ext4": 70* fix 32000 subdirectory limit
53 71* nsec timestamps for mtime, atime, ctime, create time
54* dir_index and resize inode will be on by default 72* inode version field on disk (NFSv4, Lustre)
55* large inodes will be used by default for fast EAs, nsec timestamps, etc 73* reduced e2fsck time via uninit_bg feature
74* journal checksumming for robustness, performance
75* persistent file preallocation (e.g for streaming media, databases)
76* ability to pack bitmaps and inode tables into larger virtual groups via the
77 flex_bg feature
78* large file support
79* Inode allocation using large virtual block groups via flex_bg
80* delayed allocation
81* large block (up to pagesize) support
82* efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force
83 the ordering)
56 84
572.2 Candidate features for future inclusion 852.2 Candidate features for future inclusion
58 86
59There are several under discussion, whether they all make it in is 87* Online defrag (patches available but not well tested)
60partly a function of how much time everyone has to work on them: 88* reduced mke2fs time via lazy itable initialization in conjuction with
89 the uninit_bg feature (capability to do this is available in e2fsprogs
90 but a kernel thread to do lazy zeroing of unused inode table blocks
91 after filesystem is first mounted is required for safety)
61 92
62* improved file allocation (multi-block alloc, delayed alloc; basically done) 93There are several others under discussion, whether they all make it in is
63* fix 32000 subdirectory limit (patch exists, needs some e2fsck work) 94partly a function of how much time everyone has to work on them. Features like
64* nsec timestamps for mtime, atime, ctime, create time (patch exists, 95metadata checksumming have been discussed and planned for a bit but no patches
65 needs some e2fsck work) 96exist yet so I'm not sure they're in the near-term roadmap.
66* inode version field on disk (NFSv4, Lustre; prototype exists)
67* reduced mke2fs/e2fsck time via uninitialized groups (prototype exists)
68* journal checksumming for robustness, performance (prototype exists)
69* persistent file preallocation (e.g for streaming media, databases)
70 97
71Features like metadata checksumming have been discussed and planned for 98The big performance win will come with mballoc, delalloc and flex_bg
72a bit but no patches exist yet so I'm not sure they're in the near-term 99grouping of bitmaps and inode tables. Some test results available here:
73roadmap.
74 100
75The big performance win will come with mballoc and delalloc. CFS has 101 - http://www.bullopensource.org/ext4/20080530/ffsb-write-2.6.26-rc2.html
76been using mballoc for a few years already with Lustre, and IBM + Bull 102 - http://www.bullopensource.org/ext4/20080530/ffsb-readwrite-2.6.26-rc2.html
77did a lot of benchmarking on it. The reason it isn't in the first set of
78patches is partly a manageability issue, and partly because it doesn't
79directly affect the on-disk format (outside of much better allocation)
80so it isn't critical to get into the first round of changes. I believe
81Alex is working on a new set of patches right now.
82 103
833. Options 1043. Options
84========== 105==========
@@ -222,9 +243,11 @@ stripe=n Number of filesystem blocks that mballoc will try
222 to use for allocation size and alignment. For RAID5/6 243 to use for allocation size and alignment. For RAID5/6
223 systems this should be the number of data 244 systems this should be the number of data
224 disks * RAID chunk size in file system blocks. 245 disks * RAID chunk size in file system blocks.
225 246delalloc (*) Deferring block allocation until write-out time.
247nodelalloc Disable delayed allocation. Blocks are allocation
248 when data is copied from user to page cache.
226Data Mode 249Data Mode
227--------- 250=========
228There are 3 different data modes: 251There are 3 different data modes:
229 252
230* writeback mode 253* writeback mode
@@ -236,10 +259,10 @@ typically provide the best ext4 performance.
236 259
237* ordered mode 260* ordered mode
238In data=ordered mode, ext4 only officially journals metadata, but it logically 261In data=ordered mode, ext4 only officially journals metadata, but it logically
239groups metadata and data blocks into a single unit called a transaction. When 262groups metadata information related to data changes with the data blocks into a
240it's time to write the new metadata out to disk, the associated data blocks 263single unit called a transaction. When it's time to write the new metadata
241are written first. In general, this mode performs slightly slower than 264out to disk, the associated data blocks are written first. In general,
242writeback but significantly faster than journal mode. 265this mode performs slightly slower than writeback but significantly faster than journal mode.
243 266
244* journal mode 267* journal mode
245data=journal mode provides full data and metadata journaling. All new data is 268data=journal mode provides full data and metadata journaling. All new data is
@@ -247,7 +270,8 @@ written to the journal first, and then to its final location.
247In the event of a crash, the journal can be replayed, bringing both data and 270In the event of a crash, the journal can be replayed, bringing both data and
248metadata into a consistent state. This mode is the slowest except when data 271metadata into a consistent state. This mode is the slowest except when data
249needs to be read from and written to disk at the same time where it 272needs to be read from and written to disk at the same time where it
250outperforms all others modes. 273outperforms all others modes. Curently ext4 does not have delayed
274allocation support if this data journalling mode is selected.
251 275
252References 276References
253========== 277==========
@@ -256,7 +280,8 @@ kernel source: <file:fs/ext4/>
256 <file:fs/jbd2/> 280 <file:fs/jbd2/>
257 281
258programs: http://e2fsprogs.sourceforge.net/ 282programs: http://e2fsprogs.sourceforge.net/
259 http://ext2resize.sourceforge.net
260 283
261useful links: http://fedoraproject.org/wiki/ext3-devel 284useful links: http://fedoraproject.org/wiki/ext3-devel
262 http://www.bullopensource.org/ext4/ 285 http://www.bullopensource.org/ext4/
286 http://ext4.wiki.kernel.org/index.php/Main_Page
287 http://fedoraproject.org/wiki/Features/Ext4
diff --git a/Documentation/filesystems/gfs2-glocks.txt b/Documentation/filesystems/gfs2-glocks.txt
new file mode 100644
index 000000000000..4dae9a3840bf
--- /dev/null
+++ b/Documentation/filesystems/gfs2-glocks.txt
@@ -0,0 +1,114 @@
1 Glock internal locking rules
2 ------------------------------
3
4This documents the basic principles of the glock state machine
5internals. Each glock (struct gfs2_glock in fs/gfs2/incore.h)
6has two main (internal) locks:
7
8 1. A spinlock (gl_spin) which protects the internal state such
9 as gl_state, gl_target and the list of holders (gl_holders)
10 2. A non-blocking bit lock, GLF_LOCK, which is used to prevent other
11 threads from making calls to the DLM, etc. at the same time. If a
12 thread takes this lock, it must then call run_queue (usually via the
13 workqueue) when it releases it in order to ensure any pending tasks
14 are completed.
15
16The gl_holders list contains all the queued lock requests (not
17just the holders) associated with the glock. If there are any
18held locks, then they will be contiguous entries at the head
19of the list. Locks are granted in strictly the order that they
20are queued, except for those marked LM_FLAG_PRIORITY which are
21used only during recovery, and even then only for journal locks.
22
23There are three lock states that users of the glock layer can request,
24namely shared (SH), deferred (DF) and exclusive (EX). Those translate
25to the following DLM lock modes:
26
27Glock mode | DLM lock mode
28------------------------------
29 UN | IV/NL Unlocked (no DLM lock associated with glock) or NL
30 SH | PR (Protected read)
31 DF | CW (Concurrent write)
32 EX | EX (Exclusive)
33
34Thus DF is basically a shared mode which is incompatible with the "normal"
35shared lock mode, SH. In GFS2 the DF mode is used exclusively for direct I/O
36operations. The glocks are basically a lock plus some routines which deal
37with cache management. The following rules apply for the cache:
38
39Glock mode | Cache data | Cache Metadata | Dirty Data | Dirty Metadata
40--------------------------------------------------------------------------
41 UN | No | No | No | No
42 SH | Yes | Yes | No | No
43 DF | No | Yes | No | No
44 EX | Yes | Yes | Yes | Yes
45
46These rules are implemented using the various glock operations which
47are defined for each type of glock. Not all types of glocks use
48all the modes. Only inode glocks use the DF mode for example.
49
50Table of glock operations and per type constants:
51
52Field | Purpose
53----------------------------------------------------------------------------
54go_xmote_th | Called before remote state change (e.g. to sync dirty data)
55go_xmote_bh | Called after remote state change (e.g. to refill cache)
56go_inval | Called if remote state change requires invalidating the cache
57go_demote_ok | Returns boolean value of whether its ok to demote a glock
58 | (e.g. checks timeout, and that there is no cached data)
59go_lock | Called for the first local holder of a lock
60go_unlock | Called on the final local unlock of a lock
61go_dump | Called to print content of object for debugfs file, or on
62 | error to dump glock to the log.
63go_type; | The type of the glock, LM_TYPE_.....
64go_min_hold_time | The minimum hold time
65
66The minimum hold time for each lock is the time after a remote lock
67grant for which we ignore remote demote requests. This is in order to
68prevent a situation where locks are being bounced around the cluster
69from node to node with none of the nodes making any progress. This
70tends to show up most with shared mmaped files which are being written
71to by multiple nodes. By delaying the demotion in response to a
72remote callback, that gives the userspace program time to make
73some progress before the pages are unmapped.
74
75There is a plan to try and remove the go_lock and go_unlock callbacks
76if possible, in order to try and speed up the fast path though the locking.
77Also, eventually we hope to make the glock "EX" mode locally shared
78such that any local locking will be done with the i_mutex as required
79rather than via the glock.
80
81Locking rules for glock operations:
82
83Operation | GLF_LOCK bit lock held | gl_spin spinlock held
84-----------------------------------------------------------------
85go_xmote_th | Yes | No
86go_xmote_bh | Yes | No
87go_inval | Yes | No
88go_demote_ok | Sometimes | Yes
89go_lock | Yes | No
90go_unlock | Yes | No
91go_dump | Sometimes | Yes
92
93N.B. Operations must not drop either the bit lock or the spinlock
94if its held on entry. go_dump and do_demote_ok must never block.
95Note that go_dump will only be called if the glock's state
96indicates that it is caching uptodate data.
97
98Glock locking order within GFS2:
99
100 1. i_mutex (if required)
101 2. Rename glock (for rename only)
102 3. Inode glock(s)
103 (Parents before children, inodes at "same level" with same parent in
104 lock number order)
105 4. Rgrp glock(s) (for (de)allocation operations)
106 5. Transaction glock (via gfs2_trans_begin) for non-read operations
107 6. Page lock (always last, very important!)
108
109There are two glocks per inode. One deals with access to the inode
110itself (locking order as above), and the other, known as the iopen
111glock is used in conjunction with the i_nlink field in the inode to
112determine the lifetime of the inode in question. Locking of inodes
113is on a per-inode basis. Locking of rgrps is on a per rgrp basis.
114
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index dbc3c6a3650f..7f268f327d75 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -380,28 +380,35 @@ i386 and x86_64 platforms support the new IRQ vector displays.
380Of some interest is the introduction of the /proc/irq directory to 2.4. 380Of some interest is the introduction of the /proc/irq directory to 2.4.
381It could be used to set IRQ to CPU affinity, this means that you can "hook" an 381It could be used to set IRQ to CPU affinity, this means that you can "hook" an
382IRQ to only one CPU, or to exclude a CPU of handling IRQs. The contents of the 382IRQ to only one CPU, or to exclude a CPU of handling IRQs. The contents of the
383irq subdir is one subdir for each IRQ, and one file; prof_cpu_mask 383irq subdir is one subdir for each IRQ, and two files; default_smp_affinity and
384prof_cpu_mask.
384 385
385For example 386For example
386 > ls /proc/irq/ 387 > ls /proc/irq/
387 0 10 12 14 16 18 2 4 6 8 prof_cpu_mask 388 0 10 12 14 16 18 2 4 6 8 prof_cpu_mask
388 1 11 13 15 17 19 3 5 7 9 389 1 11 13 15 17 19 3 5 7 9 default_smp_affinity
389 > ls /proc/irq/0/ 390 > ls /proc/irq/0/
390 smp_affinity 391 smp_affinity
391 392
392The contents of the prof_cpu_mask file and each smp_affinity file for each IRQ 393smp_affinity is a bitmask, in which you can specify which CPUs can handle the
393is the same by default: 394IRQ, you can set it by doing:
394 395
395 > cat /proc/irq/0/smp_affinity 396 > echo 1 > /proc/irq/10/smp_affinity
396 ffffffff 397
398This means that only the first CPU will handle the IRQ, but you can also echo
3995 which means that only the first and fourth CPU can handle the IRQ.
397 400
398It's a bitmask, in which you can specify which CPUs can handle the IRQ, you can 401The contents of each smp_affinity file is the same by default:
399set it by doing: 402
403 > cat /proc/irq/0/smp_affinity
404 ffffffff
400 405
401 > echo 1 > /proc/irq/prof_cpu_mask 406The default_smp_affinity mask applies to all non-active IRQs, which are the
407IRQs which have not yet been allocated/activated, and hence which lack a
408/proc/irq/[0-9]* directory.
402 409
403This means that only the first CPU will handle the IRQ, but you can also echo 5 410prof_cpu_mask specifies which CPUs are to be profiled by the system wide
404which means that only the first and fourth CPU can handle the IRQ. 411profiler. Default value is ffffffff (all cpus).
405 412
406The way IRQs are routed is handled by the IO-APIC, and it's Round Robin 413The way IRQs are routed is handled by the IO-APIC, and it's Round Robin
407between all the CPUs which are allowed to handle it. As usual the kernel has 414between all the CPUs which are allowed to handle it. As usual the kernel has
diff --git a/Documentation/i2c/busses/i2c-i810 b/Documentation/i2c/busses/i2c-i810
deleted file mode 100644
index 778210ee1583..000000000000
--- a/Documentation/i2c/busses/i2c-i810
+++ /dev/null
@@ -1,47 +0,0 @@
1Kernel driver i2c-i810
2
3Supported adapters:
4 * Intel 82810, 82810-DC100, 82810E, and 82815 (GMCH)
5 * Intel 82845G (GMCH)
6
7Authors:
8 Frodo Looijaard <frodol@dds.nl>,
9 Philip Edelbrock <phil@netroedge.com>,
10 Kyösti Mälkki <kmalkki@cc.hut.fi>,
11 Ralph Metzler <rjkm@thp.uni-koeln.de>,
12 Mark D. Studebaker <mdsxyz123@yahoo.com>
13
14Main contact: Mark Studebaker <mdsxyz123@yahoo.com>
15
16Description
17-----------
18
19WARNING: If you have an '810' or '815' motherboard, your standard I2C
20temperature sensors are most likely on the 801's I2C bus. You want the
21i2c-i801 driver for those, not this driver.
22
23Now for the i2c-i810...
24
25The GMCH chip contains two I2C interfaces.
26
27The first interface is used for DDC (Data Display Channel) which is a
28serial channel through the VGA monitor connector to a DDC-compliant
29monitor. This interface is defined by the Video Electronics Standards
30Association (VESA). The standards are available for purchase at
31http://www.vesa.org .
32
33The second interface is a general-purpose I2C bus. It may be connected to a
34TV-out chip such as the BT869 or possibly to a digital flat-panel display.
35
36Features
37--------
38
39Both busses use the i2c-algo-bit driver for 'bit banging'
40and support for specific transactions is provided by i2c-algo-bit.
41
42Issues
43------
44
45If you enable bus testing in i2c-algo-bit (insmod i2c-algo-bit bit_test=1),
46the test may fail; if so, the i2c-i810 driver won't be inserted. However,
47we think this has been fixed.
diff --git a/Documentation/i2c/busses/i2c-prosavage b/Documentation/i2c/busses/i2c-prosavage
deleted file mode 100644
index 703687902511..000000000000
--- a/Documentation/i2c/busses/i2c-prosavage
+++ /dev/null
@@ -1,23 +0,0 @@
1Kernel driver i2c-prosavage
2
3Supported adapters:
4
5 S3/VIA KM266/VT8375 aka ProSavage8
6 S3/VIA KM133/VT8365 aka Savage4
7
8Author: Henk Vergonet <henk@god.dyndns.org>
9
10Description
11-----------
12
13The Savage4 chips contain two I2C interfaces (aka a I2C 'master' or
14'host').
15
16The first interface is used for DDC (Data Display Channel) which is a
17serial channel through the VGA monitor connector to a DDC-compliant
18monitor. This interface is defined by the Video Electronics Standards
19Association (VESA). The standards are available for purchase at
20http://www.vesa.org . The second interface is a general-purpose I2C bus.
21
22Usefull for gaining access to the TV Encoder chips.
23
diff --git a/Documentation/i2c/busses/i2c-savage4 b/Documentation/i2c/busses/i2c-savage4
deleted file mode 100644
index 6ecceab618d3..000000000000
--- a/Documentation/i2c/busses/i2c-savage4
+++ /dev/null
@@ -1,26 +0,0 @@
1Kernel driver i2c-savage4
2
3Supported adapters:
4 * Savage4
5 * Savage2000
6
7Authors:
8 Alexander Wold <awold@bigfoot.com>,
9 Mark D. Studebaker <mdsxyz123@yahoo.com>
10
11Description
12-----------
13
14The Savage4 chips contain two I2C interfaces (aka a I2C 'master'
15or 'host').
16
17The first interface is used for DDC (Data Display Channel) which is a
18serial channel through the VGA monitor connector to a DDC-compliant
19monitor. This interface is defined by the Video Electronics Standards
20Association (VESA). The standards are available for purchase at
21http://www.vesa.org . The DDC bus is not yet supported because its register
22is not directly memory-mapped.
23
24The second interface is a general-purpose I2C bus. This is the only
25interface supported by the driver at the moment.
26
diff --git a/Documentation/i2c/fault-codes b/Documentation/i2c/fault-codes
new file mode 100644
index 000000000000..045765c0b9b5
--- /dev/null
+++ b/Documentation/i2c/fault-codes
@@ -0,0 +1,127 @@
1This is a summary of the most important conventions for use of fault
2codes in the I2C/SMBus stack.
3
4
5A "Fault" is not always an "Error"
6----------------------------------
7Not all fault reports imply errors; "page faults" should be a familiar
8example. Software often retries idempotent operations after transient
9faults. There may be fancier recovery schemes that are appropriate in
10some cases, such as re-initializing (and maybe resetting). After such
11recovery, triggered by a fault report, there is no error.
12
13In a similar way, sometimes a "fault" code just reports one defined
14result for an operation ... it doesn't indicate that anything is wrong
15at all, just that the outcome wasn't on the "golden path".
16
17In short, your I2C driver code may need to know these codes in order
18to respond correctly. Other code may need to rely on YOUR code reporting
19the right fault code, so that it can (in turn) behave correctly.
20
21
22I2C and SMBus fault codes
23-------------------------
24These are returned as negative numbers from most calls, with zero or
25some positive number indicating a non-fault return. The specific
26numbers associated with these symbols differ between architectures,
27though most Linux systems use <asm-generic/errno*.h> numbering.
28
29Note that the descriptions here are not exhaustive. There are other
30codes that may be returned, and other cases where these codes should
31be returned. However, drivers should not return other codes for these
32cases (unless the hardware doesn't provide unique fault reports).
33
34Also, codes returned by adapter probe methods follow rules which are
35specific to their host bus (such as PCI, or the platform bus).
36
37
38EAGAIN
39 Returned by I2C adapters when they lose arbitration in master
40 transmit mode: some other master was transmitting different
41 data at the same time.
42
43 Also returned when trying to invoke an I2C operation in an
44 atomic context, when some task is already using that I2C bus
45 to execute some other operation.
46
47EBADMSG
48 Returned by SMBus logic when an invalid Packet Error Code byte
49 is received. This code is a CRC covering all bytes in the
50 transaction, and is sent before the terminating STOP. This
51 fault is only reported on read transactions; the SMBus slave
52 may have a way to report PEC mismatches on writes from the
53 host. Note that even if PECs are in use, you should not rely
54 on these as the only way to detect incorrect data transfers.
55
56EBUSY
57 Returned by SMBus adapters when the bus was busy for longer
58 than allowed. This usually indicates some device (maybe the
59 SMBus adapter) needs some fault recovery (such as resetting),
60 or that the reset was attempted but failed.
61
62EINVAL
63 This rather vague error means an invalid parameter has been
64 detected before any I/O operation was started. Use a more
65 specific fault code when you can.
66
67 One example would be a driver trying an SMBus Block Write
68 with block size outside the range of 1-32 bytes.
69
70EIO
71 This rather vague error means something went wrong when
72 performing an I/O operation. Use a more specific fault
73 code when you can.
74
75ENODEV
76 Returned by driver probe() methods. This is a bit more
77 specific than ENXIO, implying the problem isn't with the
78 address, but with the device found there. Driver probes
79 may verify the device returns *correct* responses, and
80 return this as appropriate. (The driver core will warn
81 about probe faults other than ENXIO and ENODEV.)
82
83ENOMEM
84 Returned by any component that can't allocate memory when
85 it needs to do so.
86
87ENXIO
88 Returned by I2C adapters to indicate that the address phase
89 of a transfer didn't get an ACK. While it might just mean
90 an I2C device was temporarily not responding, usually it
91 means there's nothing listening at that address.
92
93 Returned by driver probe() methods to indicate that they
94 found no device to bind to. (ENODEV may also be used.)
95
96EOPNOTSUPP
97 Returned by an adapter when asked to perform an operation
98 that it doesn't, or can't, support.
99
100 For example, this would be returned when an adapter that
101 doesn't support SMBus block transfers is asked to execute
102 one. In that case, the driver making that request should
103 have verified that functionality was supported before it
104 made that block transfer request.
105
106 Similarly, if an I2C adapter can't execute all legal I2C
107 messages, it should return this when asked to perform a
108 transaction it can't. (These limitations can't be seen in
109 the adapter's functionality mask, since the assumption is
110 that if an adapter supports I2C it supports all of I2C.)
111
112EPROTO
113 Returned when slave does not conform to the relevant I2C
114 or SMBus (or chip-specific) protocol specifications. One
115 case is when the length of an SMBus block data response
116 (from the SMBus slave) is outside the range 1-32 bytes.
117
118ETIMEDOUT
119 This is returned by drivers when an operation took too much
120 time, and was aborted before it completed.
121
122 SMBus adapters may return it when an operation took more
123 time than allowed by the SMBus specification; for example,
124 when a slave stretches clocks too far. I2C has no such
125 timeouts, but it's normal for I2C adapters to impose some
126 arbitrary limits (much longer than SMBus!) too.
127
diff --git a/Documentation/i2c/smbus-protocol b/Documentation/i2c/smbus-protocol
index 03f08fb491cc..24bfb65da17d 100644
--- a/Documentation/i2c/smbus-protocol
+++ b/Documentation/i2c/smbus-protocol
@@ -42,8 +42,8 @@ Count (8 bits): A data byte containing the length of a block operation.
42[..]: Data sent by I2C device, as opposed to data sent by the host adapter. 42[..]: Data sent by I2C device, as opposed to data sent by the host adapter.
43 43
44 44
45SMBus Quick Command: i2c_smbus_write_quick() 45SMBus Quick Command
46============================================= 46===================
47 47
48This sends a single bit to the device, at the place of the Rd/Wr bit. 48This sends a single bit to the device, at the place of the Rd/Wr bit.
49 49
diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
index d4cd4126d1ad..6b61b3a2e90b 100644
--- a/Documentation/i2c/writing-clients
+++ b/Documentation/i2c/writing-clients
@@ -44,6 +44,10 @@ static struct i2c_driver foo_driver = {
44 .id_table = foo_ids, 44 .id_table = foo_ids,
45 .probe = foo_probe, 45 .probe = foo_probe,
46 .remove = foo_remove, 46 .remove = foo_remove,
47 /* if device autodetection is needed: */
48 .class = I2C_CLASS_SOMETHING,
49 .detect = foo_detect,
50 .address_data = &addr_data,
47 51
48 /* else, driver uses "legacy" binding model: */ 52 /* else, driver uses "legacy" binding model: */
49 .attach_adapter = foo_attach_adapter, 53 .attach_adapter = foo_attach_adapter,
@@ -217,6 +221,31 @@ in the I2C bus driver. You may want to save the returned i2c_client
217reference for later use. 221reference for later use.
218 222
219 223
224Device Detection (Standard driver model)
225----------------------------------------
226
227Sometimes you do not know in advance which I2C devices are connected to
228a given I2C bus. This is for example the case of hardware monitoring
229devices on a PC's SMBus. In that case, you may want to let your driver
230detect supported devices automatically. This is how the legacy model
231was working, and is now available as an extension to the standard
232driver model (so that we can finally get rid of the legacy model.)
233
234You simply have to define a detect callback which will attempt to
235identify supported devices (returning 0 for supported ones and -ENODEV
236for unsupported ones), a list of addresses to probe, and a device type
237(or class) so that only I2C buses which may have that type of device
238connected (and not otherwise enumerated) will be probed. The i2c
239core will then call you back as needed and will instantiate a device
240for you for every successful detection.
241
242Note that this mechanism is purely optional and not suitable for all
243devices. You need some reliable way to identify the supported devices
244(typically using device-specific, dedicated identification registers),
245otherwise misdetections are likely to occur and things can get wrong
246quickly.
247
248
220Device Deletion (Standard driver model) 249Device Deletion (Standard driver model)
221--------------------------------------- 250---------------------------------------
222 251
@@ -569,7 +598,6 @@ SMBus communication
569 in terms of it. Never use this function directly! 598 in terms of it. Never use this function directly!
570 599
571 600
572 extern s32 i2c_smbus_write_quick(struct i2c_client * client, u8 value);
573 extern s32 i2c_smbus_read_byte(struct i2c_client * client); 601 extern s32 i2c_smbus_read_byte(struct i2c_client * client);
574 extern s32 i2c_smbus_write_byte(struct i2c_client * client, u8 value); 602 extern s32 i2c_smbus_write_byte(struct i2c_client * client, u8 value);
575 extern s32 i2c_smbus_read_byte_data(struct i2c_client * client, u8 command); 603 extern s32 i2c_smbus_read_byte_data(struct i2c_client * client, u8 command);
@@ -578,30 +606,31 @@ SMBus communication
578 extern s32 i2c_smbus_read_word_data(struct i2c_client * client, u8 command); 606 extern s32 i2c_smbus_read_word_data(struct i2c_client * client, u8 command);
579 extern s32 i2c_smbus_write_word_data(struct i2c_client * client, 607 extern s32 i2c_smbus_write_word_data(struct i2c_client * client,
580 u8 command, u16 value); 608 u8 command, u16 value);
609 extern s32 i2c_smbus_read_block_data(struct i2c_client * client,
610 u8 command, u8 *values);
581 extern s32 i2c_smbus_write_block_data(struct i2c_client * client, 611 extern s32 i2c_smbus_write_block_data(struct i2c_client * client,
582 u8 command, u8 length, 612 u8 command, u8 length,
583 u8 *values); 613 u8 *values);
584 extern s32 i2c_smbus_read_i2c_block_data(struct i2c_client * client, 614 extern s32 i2c_smbus_read_i2c_block_data(struct i2c_client * client,
585 u8 command, u8 length, u8 *values); 615 u8 command, u8 length, u8 *values);
586
587These ones were removed in Linux 2.6.10 because they had no users, but could
588be added back later if needed:
589
590 extern s32 i2c_smbus_read_block_data(struct i2c_client * client,
591 u8 command, u8 *values);
592 extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client * client, 616 extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client * client,
593 u8 command, u8 length, 617 u8 command, u8 length,
594 u8 *values); 618 u8 *values);
619
620These ones were removed from i2c-core because they had no users, but could
621be added back later if needed:
622
623 extern s32 i2c_smbus_write_quick(struct i2c_client * client, u8 value);
595 extern s32 i2c_smbus_process_call(struct i2c_client * client, 624 extern s32 i2c_smbus_process_call(struct i2c_client * client,
596 u8 command, u16 value); 625 u8 command, u16 value);
597 extern s32 i2c_smbus_block_process_call(struct i2c_client *client, 626 extern s32 i2c_smbus_block_process_call(struct i2c_client *client,
598 u8 command, u8 length, 627 u8 command, u8 length,
599 u8 *values) 628 u8 *values)
600 629
601All these transactions return -1 on failure. The 'write' transactions 630All these transactions return a negative errno value on failure. The 'write'
602return 0 on success; the 'read' transactions return the read value, except 631transactions return 0 on success; the 'read' transactions return the read
603for read_block, which returns the number of values read. The block buffers 632value, except for block transactions, which return the number of values
604need not be longer than 32 bytes. 633read. The block buffers need not be longer than 32 bytes.
605 634
606You can read the file `smbus-protocol' for more information about the 635You can read the file `smbus-protocol' for more information about the
607actual SMBus protocol. 636actual SMBus protocol.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b3a5aad7e629..312fe77764a4 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -571,6 +571,8 @@ and is between 256 and 4096 characters. It is defined in the file
571 571
572 debug_objects [KNL] Enable object debugging 572 debug_objects [KNL] Enable object debugging
573 573
574 debugpat [X86] Enable PAT debugging
575
574 decnet.addr= [HW,NET] 576 decnet.addr= [HW,NET]
575 Format: <area>[,<node>] 577 Format: <area>[,<node>]
576 See also Documentation/networking/decnet.txt. 578 See also Documentation/networking/decnet.txt.
@@ -756,9 +758,6 @@ and is between 256 and 4096 characters. It is defined in the file
756 hd= [EIDE] (E)IDE hard drive subsystem geometry 758 hd= [EIDE] (E)IDE hard drive subsystem geometry
757 Format: <cyl>,<head>,<sect> 759 Format: <cyl>,<head>,<sect>
758 760
759 hd?= [HW] (E)IDE subsystem
760 hd?lun= See Documentation/ide/ide.txt.
761
762 highmem=nn[KMG] [KNL,BOOT] forces the highmem zone to have an exact 761 highmem=nn[KMG] [KNL,BOOT] forces the highmem zone to have an exact
763 size of <nn>. This works even on boxes that have no 762 size of <nn>. This works even on boxes that have no
764 highmem otherwise. This also works to reduce highmem 763 highmem otherwise. This also works to reduce highmem
@@ -1610,6 +1609,10 @@ and is between 256 and 4096 characters. It is defined in the file
1610 Format: { parport<nr> | timid | 0 } 1609 Format: { parport<nr> | timid | 0 }
1611 See also Documentation/parport.txt. 1610 See also Documentation/parport.txt.
1612 1611
1612 pmtmr= [X86] Manual setup of pmtmr I/O Port.
1613 Override pmtimer IOPort with a hex value.
1614 e.g. pmtmr=0x508
1615
1613 pnpacpi= [ACPI] 1616 pnpacpi= [ACPI]
1614 { off } 1617 { off }
1615 1618
diff --git a/MAINTAINERS b/MAINTAINERS
index 1528e58b5408..6198fa3deb99 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1686,6 +1686,13 @@ L: linuxppc-embedded@ozlabs.org
1686L: linux-kernel@vger.kernel.org 1686L: linux-kernel@vger.kernel.org
1687S: Maintained 1687S: Maintained
1688 1688
1689FREESCALE I2C CPM DRIVER
1690P: Jochen Friedrich
1691M: jochen@scram.de
1692L: linuxppc-dev@ozlabs.org
1693L: i2c@lm-sensors.org
1694S: Maintained
1695
1689FREESCALE SOC FS_ENET DRIVER 1696FREESCALE SOC FS_ENET DRIVER
1690P: Pantelis Antoniou 1697P: Pantelis Antoniou
1691M: pantelis.antoniou@gmail.com 1698M: pantelis.antoniou@gmail.com
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index facf82a5499a..c626a821cdcb 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -42,8 +42,7 @@ void ack_bad_irq(unsigned int irq)
42#ifdef CONFIG_SMP 42#ifdef CONFIG_SMP
43static char irq_user_affinity[NR_IRQS]; 43static char irq_user_affinity[NR_IRQS];
44 44
45int 45int irq_select_affinity(unsigned int irq)
46select_smp_affinity(unsigned int irq)
47{ 46{
48 static int last_cpu; 47 static int last_cpu;
49 int cpu = last_cpu + 1; 48 int cpu = last_cpu + 1;
@@ -51,7 +50,7 @@ select_smp_affinity(unsigned int irq)
51 if (!irq_desc[irq].chip->set_affinity || irq_user_affinity[irq]) 50 if (!irq_desc[irq].chip->set_affinity || irq_user_affinity[irq])
52 return 1; 51 return 1;
53 52
54 while (!cpu_possible(cpu)) 53 while (!cpu_possible(cpu) || !cpu_isset(cpu, irq_default_affinity))
55 cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0); 54 cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0);
56 last_cpu = cpu; 55 last_cpu = cpu;
57 56
diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c
index 90e0c35ae60d..fc650f64df43 100644
--- a/arch/arm/kernel/stacktrace.c
+++ b/arch/arm/kernel/stacktrace.c
@@ -92,4 +92,5 @@ void save_stack_trace(struct stack_trace *trace)
92{ 92{
93 save_stack_trace_tsk(current, trace); 93 save_stack_trace_tsk(current, trace);
94} 94}
95EXPORT_SYMBOL_GPL(save_stack_trace);
95#endif 96#endif
diff --git a/arch/avr32/kernel/stacktrace.c b/arch/avr32/kernel/stacktrace.c
index 9a68190bbffd..f4bdb448049c 100644
--- a/arch/avr32/kernel/stacktrace.c
+++ b/arch/avr32/kernel/stacktrace.c
@@ -51,3 +51,4 @@ void save_stack_trace(struct stack_trace *trace)
51 fp = frame->fp; 51 fp = frame->fp;
52 } 52 }
53} 53}
54EXPORT_SYMBOL_GPL(save_stack_trace);
diff --git a/arch/mips/kernel/stacktrace.c b/arch/mips/kernel/stacktrace.c
index ebd9db8d1ece..5eb4681a73d2 100644
--- a/arch/mips/kernel/stacktrace.c
+++ b/arch/mips/kernel/stacktrace.c
@@ -73,3 +73,4 @@ void save_stack_trace(struct stack_trace *trace)
73 prepare_frametrace(regs); 73 prepare_frametrace(regs);
74 save_context_stack(trace, regs); 74 save_context_stack(trace, regs);
75} 75}
76EXPORT_SYMBOL_GPL(save_stack_trace);
diff --git a/arch/mips/sibyte/swarm/Makefile b/arch/mips/sibyte/swarm/Makefile
index 1775755a2619..255d692bfa18 100644
--- a/arch/mips/sibyte/swarm/Makefile
+++ b/arch/mips/sibyte/swarm/Makefile
@@ -1,3 +1,4 @@
1obj-y := setup.o rtc_xicor1241.o rtc_m41t81.o 1obj-y := setup.o rtc_xicor1241.o rtc_m41t81.o
2 2
3obj-$(CONFIG_I2C_BOARDINFO) += swarm-i2c.o
3obj-$(CONFIG_KGDB) += dbg_io.o 4obj-$(CONFIG_KGDB) += dbg_io.o
diff --git a/arch/mips/sibyte/swarm/swarm-i2c.c b/arch/mips/sibyte/swarm/swarm-i2c.c
new file mode 100644
index 000000000000..4282ac9d01d2
--- /dev/null
+++ b/arch/mips/sibyte/swarm/swarm-i2c.c
@@ -0,0 +1,37 @@
1/*
2 * arch/mips/sibyte/swarm/swarm-i2c.c
3 *
4 * Broadcom BCM91250A (SWARM), etc. I2C platform setup.
5 *
6 * Copyright (c) 2008 Maciej W. Rozycki
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <linux/i2c.h>
15#include <linux/init.h>
16#include <linux/kernel.h>
17
18
19static struct i2c_board_info swarm_i2c_info1[] __initdata = {
20 {
21 I2C_BOARD_INFO("m41t81", 0x68),
22 },
23};
24
25static int __init swarm_i2c_init(void)
26{
27 int err;
28
29 err = i2c_register_board_info(1, swarm_i2c_info1,
30 ARRAY_SIZE(swarm_i2c_info1));
31 if (err < 0)
32 printk(KERN_ERR
33 "swarm-i2c: cannot register board I2C devices\n");
34 return err;
35}
36
37arch_initcall(swarm_i2c_init);
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 962944038430..3cf0d94ba340 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -12,6 +12,7 @@
12 12
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/stacktrace.h> 14#include <linux/stacktrace.h>
15#include <linux/module.h>
15#include <asm/ptrace.h> 16#include <asm/ptrace.h>
16 17
17/* 18/*
@@ -44,3 +45,4 @@ void save_stack_trace(struct stack_trace *trace)
44 sp = newsp; 45 sp = newsp;
45 } 46 }
46} 47}
48EXPORT_SYMBOL_GPL(save_stack_trace);
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 85e46a5d0e08..57571f10270c 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -81,6 +81,7 @@ void save_stack_trace(struct stack_trace *trace)
81 S390_lowcore.thread_info, 81 S390_lowcore.thread_info,
82 S390_lowcore.thread_info + THREAD_SIZE, 1); 82 S390_lowcore.thread_info + THREAD_SIZE, 1);
83} 83}
84EXPORT_SYMBOL_GPL(save_stack_trace);
84 85
85void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 86void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
86{ 87{
@@ -93,3 +94,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
93 if (trace->nr_entries < trace->max_entries) 94 if (trace->nr_entries < trace->max_entries)
94 trace->entries[trace->nr_entries++] = ULONG_MAX; 95 trace->entries[trace->nr_entries++] = ULONG_MAX;
95} 96}
97EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/sh/kernel/stacktrace.c b/arch/sh/kernel/stacktrace.c
index d41e561be20e..1b2ae35c4a76 100644
--- a/arch/sh/kernel/stacktrace.c
+++ b/arch/sh/kernel/stacktrace.c
@@ -34,3 +34,4 @@ void save_stack_trace(struct stack_trace *trace)
34 } 34 }
35 } 35 }
36} 36}
37EXPORT_SYMBOL_GPL(save_stack_trace);
diff --git a/arch/sparc64/kernel/stacktrace.c b/arch/sparc64/kernel/stacktrace.c
index c73ce3f4197e..b3e3737750d8 100644
--- a/arch/sparc64/kernel/stacktrace.c
+++ b/arch/sparc64/kernel/stacktrace.c
@@ -1,6 +1,7 @@
1#include <linux/sched.h> 1#include <linux/sched.h>
2#include <linux/stacktrace.h> 2#include <linux/stacktrace.h>
3#include <linux/thread_info.h> 3#include <linux/thread_info.h>
4#include <linux/module.h>
4#include <asm/ptrace.h> 5#include <asm/ptrace.h>
5#include <asm/stacktrace.h> 6#include <asm/stacktrace.h>
6 7
@@ -47,3 +48,4 @@ void save_stack_trace(struct stack_trace *trace)
47 trace->entries[trace->nr_entries++] = pc; 48 trace->entries[trace->nr_entries++] = pc;
48 } while (trace->nr_entries < trace->max_entries); 49 } while (trace->nr_entries < trace->max_entries);
49} 50}
51EXPORT_SYMBOL_GPL(save_stack_trace);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6958d6bcaf70..2642b4bf41b9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -447,7 +447,6 @@ config PARAVIRT_DEBUG
447config MEMTEST 447config MEMTEST
448 bool "Memtest" 448 bool "Memtest"
449 depends on X86_64 449 depends on X86_64
450 default y
451 help 450 help
452 This option adds a kernel parameter 'memtest', which allows memtest 451 This option adds a kernel parameter 'memtest', which allows memtest
453 to be set. 452 to be set.
@@ -455,7 +454,7 @@ config MEMTEST
455 memtest=1, mean do 1 test pattern; 454 memtest=1, mean do 1 test pattern;
456 ... 455 ...
457 memtest=4, mean do 4 test patterns. 456 memtest=4, mean do 4 test patterns.
458 If you are unsure how to answer this question, answer Y. 457 If you are unsure how to answer this question, answer N.
459 458
460config X86_SUMMIT_NUMA 459config X86_SUMMIT_NUMA
461 def_bool y 460 def_bool y
@@ -1135,21 +1134,18 @@ config MTRR
1135 See <file:Documentation/mtrr.txt> for more information. 1134 See <file:Documentation/mtrr.txt> for more information.
1136 1135
1137config MTRR_SANITIZER 1136config MTRR_SANITIZER
1138 def_bool y 1137 bool
1139 prompt "MTRR cleanup support" 1138 prompt "MTRR cleanup support"
1140 depends on MTRR 1139 depends on MTRR
1141 help 1140 help
1142 Convert MTRR layout from continuous to discrete, so some X driver 1141 Convert MTRR layout from continuous to discrete, so X drivers can
1143 could add WB entries. 1142 add writeback entries.
1144 1143
1145 Say N here if you see bootup problems (boot crash, boot hang, 1144 Can be disabled with disable_mtrr_cleanup on the kernel command line.
1146 spontaneous reboots). 1145 The largest mtrr entry size for a continous block can be set with
1146 mtrr_chunk_size.
1147 1147
1148 Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size 1148 If unsure, say N.
1149 could be used to send largest mtrr entry size for continuous block
1150 to hold holes (aka. UC entries)
1151
1152 If unsure, say Y.
1153 1149
1154config MTRR_SANITIZER_ENABLE_DEFAULT 1150config MTRR_SANITIZER_ENABLE_DEFAULT
1155 int "MTRR cleanup enable value (0-1)" 1151 int "MTRR cleanup enable value (0-1)"
@@ -1166,7 +1162,7 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
1166 depends on MTRR_SANITIZER 1162 depends on MTRR_SANITIZER
1167 help 1163 help
1168 mtrr cleanup spare entries default, it can be changed via 1164 mtrr cleanup spare entries default, it can be changed via
1169 mtrr_spare_reg_nr= 1165 mtrr_spare_reg_nr=N on the kernel command line.
1170 1166
1171config X86_PAT 1167config X86_PAT
1172 bool 1168 bool
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index e6a4b564ccaa..793ad2045f58 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -23,6 +23,15 @@ static unsigned long acpi_realmode;
23static char temp_stack[10240]; 23static char temp_stack[10240];
24#endif 24#endif
25 25
26/* XXX: this macro should move to asm-x86/segment.h and be shared with the
27 boot code... */
28#define GDT_ENTRY(flags, base, limit) \
29 (((u64)(base & 0xff000000) << 32) | \
30 ((u64)flags << 40) | \
31 ((u64)(limit & 0x00ff0000) << 32) | \
32 ((u64)(base & 0x00ffffff) << 16) | \
33 ((u64)(limit & 0x0000ffff)))
34
26/** 35/**
27 * acpi_save_state_mem - save kernel state 36 * acpi_save_state_mem - save kernel state
28 * 37 *
@@ -51,18 +60,27 @@ int acpi_save_state_mem(void)
51 header->video_mode = saved_video_mode; 60 header->video_mode = saved_video_mode;
52 61
53 header->wakeup_jmp_seg = acpi_wakeup_address >> 4; 62 header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
63
64 /*
65 * Set up the wakeup GDT. We set these up as Big Real Mode,
66 * that is, with limits set to 4 GB. At least the Lenovo
67 * Thinkpad X61 is known to need this for the video BIOS
68 * initialization quirk to work; this is likely to also
69 * be the case for other laptops or integrated video devices.
70 */
71
54 /* GDT[0]: GDT self-pointer */ 72 /* GDT[0]: GDT self-pointer */
55 header->wakeup_gdt[0] = 73 header->wakeup_gdt[0] =
56 (u64)(sizeof(header->wakeup_gdt) - 1) + 74 (u64)(sizeof(header->wakeup_gdt) - 1) +
57 ((u64)(acpi_wakeup_address + 75 ((u64)(acpi_wakeup_address +
58 ((char *)&header->wakeup_gdt - (char *)acpi_realmode)) 76 ((char *)&header->wakeup_gdt - (char *)acpi_realmode))
59 << 16); 77 << 16);
60 /* GDT[1]: real-mode-like code segment */ 78 /* GDT[1]: big real mode-like code segment */
61 header->wakeup_gdt[1] = (0x009bULL << 40) + 79 header->wakeup_gdt[1] =
62 ((u64)acpi_wakeup_address << 16) + 0xffff; 80 GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff);
63 /* GDT[2]: real-mode-like data segment */ 81 /* GDT[2]: big real mode-like data segment */
64 header->wakeup_gdt[2] = (0x0093ULL << 40) + 82 header->wakeup_gdt[2] =
65 ((u64)acpi_wakeup_address << 16) + 0xffff; 83 GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff);
66 84
67#ifndef CONFIG_64BIT 85#ifndef CONFIG_64BIT
68 store_gdt((struct desc_ptr *)&header->pmode_gdt); 86 store_gdt((struct desc_ptr *)&header->pmode_gdt);
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 79bdcd11c66e..d13858818100 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -266,6 +266,8 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev)
266 hpet_print_force_info(); 266 hpet_print_force_info();
267} 267}
268 268
269DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1,
270 old_ich_force_enable_hpet_user);
269DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, 271DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,
270 old_ich_force_enable_hpet_user); 272 old_ich_force_enable_hpet_user);
271DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, 273DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12,
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index c28c342c162f..a03e7f6d90c3 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -74,6 +74,7 @@ void save_stack_trace(struct stack_trace *trace)
74 if (trace->nr_entries < trace->max_entries) 74 if (trace->nr_entries < trace->max_entries)
75 trace->entries[trace->nr_entries++] = ULONG_MAX; 75 trace->entries[trace->nr_entries++] = ULONG_MAX;
76} 76}
77EXPORT_SYMBOL_GPL(save_stack_trace);
77 78
78void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 79void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
79{ 80{
@@ -81,3 +82,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
81 if (trace->nr_entries < trace->max_entries) 82 if (trace->nr_entries < trace->max_entries)
82 trace->entries[trace->nr_entries++] = ULONG_MAX; 83 trace->entries[trace->nr_entries++] = ULONG_MAX;
83} 84}
85EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/block/blk-core.c b/block/blk-core.c
index c6e536597c8a..fef79ccb2a11 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1042,15 +1042,9 @@ void blk_put_request(struct request *req)
1042 unsigned long flags; 1042 unsigned long flags;
1043 struct request_queue *q = req->q; 1043 struct request_queue *q = req->q;
1044 1044
1045 /* 1045 spin_lock_irqsave(q->queue_lock, flags);
1046 * Gee, IDE calls in w/ NULL q. Fix IDE and remove the 1046 __blk_put_request(q, req);
1047 * following if (q) test. 1047 spin_unlock_irqrestore(q->queue_lock, flags);
1048 */
1049 if (q) {
1050 spin_lock_irqsave(q->queue_lock, flags);
1051 __blk_put_request(q, req);
1052 spin_unlock_irqrestore(q->queue_lock, flags);
1053 }
1054} 1048}
1055EXPORT_SYMBOL(blk_put_request); 1049EXPORT_SYMBOL(blk_put_request);
1056 1050
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 391dd6224890..9bceff7674f2 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -18,7 +18,7 @@
18 * @rq: request to complete 18 * @rq: request to complete
19 * @error: end io status of the request 19 * @error: end io status of the request
20 */ 20 */
21void blk_end_sync_rq(struct request *rq, int error) 21static void blk_end_sync_rq(struct request *rq, int error)
22{ 22{
23 struct completion *waiting = rq->end_io_data; 23 struct completion *waiting = rq->end_io_data;
24 24
@@ -31,7 +31,6 @@ void blk_end_sync_rq(struct request *rq, int error)
31 */ 31 */
32 complete(waiting); 32 complete(waiting);
33} 33}
34EXPORT_SYMBOL(blk_end_sync_rq);
35 34
36/** 35/**
37 * blk_execute_rq_nowait - insert a request into queue for execution 36 * blk_execute_rq_nowait - insert a request into queue for execution
@@ -58,6 +57,9 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
58 spin_lock_irq(q->queue_lock); 57 spin_lock_irq(q->queue_lock);
59 __elv_add_request(q, rq, where, 1); 58 __elv_add_request(q, rq, where, 1);
60 __generic_unplug_device(q); 59 __generic_unplug_device(q);
60 /* the queue is stopped so it won't be plugged+unplugged */
61 if (blk_pm_resume_request(rq))
62 q->request_fn(q);
61 spin_unlock_irq(q->queue_lock); 63 spin_unlock_irq(q->queue_lock);
62} 64}
63EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); 65EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 5e6468a7ca4b..dc7596f028b6 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -56,6 +56,12 @@ MODULE_PARM_DESC(skip_host_reset, "skip global host reset (0=don't skip, 1=skip)
56static int ahci_enable_alpm(struct ata_port *ap, 56static int ahci_enable_alpm(struct ata_port *ap,
57 enum link_pm policy); 57 enum link_pm policy);
58static void ahci_disable_alpm(struct ata_port *ap); 58static void ahci_disable_alpm(struct ata_port *ap);
59static ssize_t ahci_led_show(struct ata_port *ap, char *buf);
60static ssize_t ahci_led_store(struct ata_port *ap, const char *buf,
61 size_t size);
62static ssize_t ahci_transmit_led_message(struct ata_port *ap, u32 state,
63 ssize_t size);
64#define MAX_SLOTS 8
59 65
60enum { 66enum {
61 AHCI_PCI_BAR = 5, 67 AHCI_PCI_BAR = 5,
@@ -98,6 +104,8 @@ enum {
98 HOST_IRQ_STAT = 0x08, /* interrupt status */ 104 HOST_IRQ_STAT = 0x08, /* interrupt status */
99 HOST_PORTS_IMPL = 0x0c, /* bitmap of implemented ports */ 105 HOST_PORTS_IMPL = 0x0c, /* bitmap of implemented ports */
100 HOST_VERSION = 0x10, /* AHCI spec. version compliancy */ 106 HOST_VERSION = 0x10, /* AHCI spec. version compliancy */
107 HOST_EM_LOC = 0x1c, /* Enclosure Management location */
108 HOST_EM_CTL = 0x20, /* Enclosure Management Control */
101 109
102 /* HOST_CTL bits */ 110 /* HOST_CTL bits */
103 HOST_RESET = (1 << 0), /* reset controller; self-clear */ 111 HOST_RESET = (1 << 0), /* reset controller; self-clear */
@@ -105,6 +113,7 @@ enum {
105 HOST_AHCI_EN = (1 << 31), /* AHCI enabled */ 113 HOST_AHCI_EN = (1 << 31), /* AHCI enabled */
106 114
107 /* HOST_CAP bits */ 115 /* HOST_CAP bits */
116 HOST_CAP_EMS = (1 << 6), /* Enclosure Management support */
108 HOST_CAP_SSC = (1 << 14), /* Slumber capable */ 117 HOST_CAP_SSC = (1 << 14), /* Slumber capable */
109 HOST_CAP_PMP = (1 << 17), /* Port Multiplier support */ 118 HOST_CAP_PMP = (1 << 17), /* Port Multiplier support */
110 HOST_CAP_CLO = (1 << 24), /* Command List Override support */ 119 HOST_CAP_CLO = (1 << 24), /* Command List Override support */
@@ -202,6 +211,11 @@ enum {
202 ATA_FLAG_IPM, 211 ATA_FLAG_IPM,
203 212
204 ICH_MAP = 0x90, /* ICH MAP register */ 213 ICH_MAP = 0x90, /* ICH MAP register */
214
215 /* em_ctl bits */
216 EM_CTL_RST = (1 << 9), /* Reset */
217 EM_CTL_TM = (1 << 8), /* Transmit Message */
218 EM_CTL_ALHD = (1 << 26), /* Activity LED */
205}; 219};
206 220
207struct ahci_cmd_hdr { 221struct ahci_cmd_hdr {
@@ -219,12 +233,21 @@ struct ahci_sg {
219 __le32 flags_size; 233 __le32 flags_size;
220}; 234};
221 235
236struct ahci_em_priv {
237 enum sw_activity blink_policy;
238 struct timer_list timer;
239 unsigned long saved_activity;
240 unsigned long activity;
241 unsigned long led_state;
242};
243
222struct ahci_host_priv { 244struct ahci_host_priv {
223 unsigned int flags; /* AHCI_HFLAG_* */ 245 unsigned int flags; /* AHCI_HFLAG_* */
224 u32 cap; /* cap to use */ 246 u32 cap; /* cap to use */
225 u32 port_map; /* port map to use */ 247 u32 port_map; /* port map to use */
226 u32 saved_cap; /* saved initial cap */ 248 u32 saved_cap; /* saved initial cap */
227 u32 saved_port_map; /* saved initial port_map */ 249 u32 saved_port_map; /* saved initial port_map */
250 u32 em_loc; /* enclosure management location */
228}; 251};
229 252
230struct ahci_port_priv { 253struct ahci_port_priv {
@@ -240,6 +263,8 @@ struct ahci_port_priv {
240 unsigned int ncq_saw_dmas:1; 263 unsigned int ncq_saw_dmas:1;
241 unsigned int ncq_saw_sdb:1; 264 unsigned int ncq_saw_sdb:1;
242 u32 intr_mask; /* interrupts to enable */ 265 u32 intr_mask; /* interrupts to enable */
266 struct ahci_em_priv em_priv[MAX_SLOTS];/* enclosure management info
267 * per PM slot */
243}; 268};
244 269
245static int ahci_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val); 270static int ahci_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
@@ -277,9 +302,20 @@ static int ahci_port_suspend(struct ata_port *ap, pm_message_t mesg);
277static int ahci_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg); 302static int ahci_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg);
278static int ahci_pci_device_resume(struct pci_dev *pdev); 303static int ahci_pci_device_resume(struct pci_dev *pdev);
279#endif 304#endif
305static ssize_t ahci_activity_show(struct ata_device *dev, char *buf);
306static ssize_t ahci_activity_store(struct ata_device *dev,
307 enum sw_activity val);
308static void ahci_init_sw_activity(struct ata_link *link);
280 309
281static struct device_attribute *ahci_shost_attrs[] = { 310static struct device_attribute *ahci_shost_attrs[] = {
282 &dev_attr_link_power_management_policy, 311 &dev_attr_link_power_management_policy,
312 &dev_attr_em_message_type,
313 &dev_attr_em_message,
314 NULL
315};
316
317static struct device_attribute *ahci_sdev_attrs[] = {
318 &dev_attr_sw_activity,
283 NULL 319 NULL
284}; 320};
285 321
@@ -289,6 +325,7 @@ static struct scsi_host_template ahci_sht = {
289 .sg_tablesize = AHCI_MAX_SG, 325 .sg_tablesize = AHCI_MAX_SG,
290 .dma_boundary = AHCI_DMA_BOUNDARY, 326 .dma_boundary = AHCI_DMA_BOUNDARY,
291 .shost_attrs = ahci_shost_attrs, 327 .shost_attrs = ahci_shost_attrs,
328 .sdev_attrs = ahci_sdev_attrs,
292}; 329};
293 330
294static struct ata_port_operations ahci_ops = { 331static struct ata_port_operations ahci_ops = {
@@ -316,6 +353,10 @@ static struct ata_port_operations ahci_ops = {
316 353
317 .enable_pm = ahci_enable_alpm, 354 .enable_pm = ahci_enable_alpm,
318 .disable_pm = ahci_disable_alpm, 355 .disable_pm = ahci_disable_alpm,
356 .em_show = ahci_led_show,
357 .em_store = ahci_led_store,
358 .sw_activity_show = ahci_activity_show,
359 .sw_activity_store = ahci_activity_store,
319#ifdef CONFIG_PM 360#ifdef CONFIG_PM
320 .port_suspend = ahci_port_suspend, 361 .port_suspend = ahci_port_suspend,
321 .port_resume = ahci_port_resume, 362 .port_resume = ahci_port_resume,
@@ -561,6 +602,11 @@ static struct pci_driver ahci_pci_driver = {
561#endif 602#endif
562}; 603};
563 604
605static int ahci_em_messages = 1;
606module_param(ahci_em_messages, int, 0444);
607/* add other LED protocol types when they become supported */
608MODULE_PARM_DESC(ahci_em_messages,
609 "Set AHCI Enclosure Management Message type (0 = disabled, 1 = LED");
564 610
565static inline int ahci_nr_ports(u32 cap) 611static inline int ahci_nr_ports(u32 cap)
566{ 612{
@@ -1031,11 +1077,28 @@ static void ahci_power_down(struct ata_port *ap)
1031 1077
1032static void ahci_start_port(struct ata_port *ap) 1078static void ahci_start_port(struct ata_port *ap)
1033{ 1079{
1080 struct ahci_port_priv *pp = ap->private_data;
1081 struct ata_link *link;
1082 struct ahci_em_priv *emp;
1083
1034 /* enable FIS reception */ 1084 /* enable FIS reception */
1035 ahci_start_fis_rx(ap); 1085 ahci_start_fis_rx(ap);
1036 1086
1037 /* enable DMA */ 1087 /* enable DMA */
1038 ahci_start_engine(ap); 1088 ahci_start_engine(ap);
1089
1090 /* turn on LEDs */
1091 if (ap->flags & ATA_FLAG_EM) {
1092 ata_port_for_each_link(link, ap) {
1093 emp = &pp->em_priv[link->pmp];
1094 ahci_transmit_led_message(ap, emp->led_state, 4);
1095 }
1096 }
1097
1098 if (ap->flags & ATA_FLAG_SW_ACTIVITY)
1099 ata_port_for_each_link(link, ap)
1100 ahci_init_sw_activity(link);
1101
1039} 1102}
1040 1103
1041static int ahci_deinit_port(struct ata_port *ap, const char **emsg) 1104static int ahci_deinit_port(struct ata_port *ap, const char **emsg)
@@ -1079,12 +1142,15 @@ static int ahci_reset_controller(struct ata_host *host)
1079 readl(mmio + HOST_CTL); /* flush */ 1142 readl(mmio + HOST_CTL); /* flush */
1080 } 1143 }
1081 1144
1082 /* reset must complete within 1 second, or 1145 /*
1146 * to perform host reset, OS should set HOST_RESET
1147 * and poll until this bit is read to be "0".
1148 * reset must complete within 1 second, or
1083 * the hardware should be considered fried. 1149 * the hardware should be considered fried.
1084 */ 1150 */
1085 ssleep(1); 1151 tmp = ata_wait_register(mmio + HOST_CTL, HOST_RESET,
1152 HOST_RESET, 10, 1000);
1086 1153
1087 tmp = readl(mmio + HOST_CTL);
1088 if (tmp & HOST_RESET) { 1154 if (tmp & HOST_RESET) {
1089 dev_printk(KERN_ERR, host->dev, 1155 dev_printk(KERN_ERR, host->dev,
1090 "controller reset failed (0x%x)\n", tmp); 1156 "controller reset failed (0x%x)\n", tmp);
@@ -1116,6 +1182,230 @@ static int ahci_reset_controller(struct ata_host *host)
1116 return 0; 1182 return 0;
1117} 1183}
1118 1184
1185static void ahci_sw_activity(struct ata_link *link)
1186{
1187 struct ata_port *ap = link->ap;
1188 struct ahci_port_priv *pp = ap->private_data;
1189 struct ahci_em_priv *emp = &pp->em_priv[link->pmp];
1190
1191 if (!(link->flags & ATA_LFLAG_SW_ACTIVITY))
1192 return;
1193
1194 emp->activity++;
1195 if (!timer_pending(&emp->timer))
1196 mod_timer(&emp->timer, jiffies + msecs_to_jiffies(10));
1197}
1198
1199static void ahci_sw_activity_blink(unsigned long arg)
1200{
1201 struct ata_link *link = (struct ata_link *)arg;
1202 struct ata_port *ap = link->ap;
1203 struct ahci_port_priv *pp = ap->private_data;
1204 struct ahci_em_priv *emp = &pp->em_priv[link->pmp];
1205 unsigned long led_message = emp->led_state;
1206 u32 activity_led_state;
1207
1208 led_message &= 0xffff0000;
1209 led_message |= ap->port_no | (link->pmp << 8);
1210
1211 /* check to see if we've had activity. If so,
1212 * toggle state of LED and reset timer. If not,
1213 * turn LED to desired idle state.
1214 */
1215 if (emp->saved_activity != emp->activity) {
1216 emp->saved_activity = emp->activity;
1217 /* get the current LED state */
1218 activity_led_state = led_message & 0x00010000;
1219
1220 if (activity_led_state)
1221 activity_led_state = 0;
1222 else
1223 activity_led_state = 1;
1224
1225 /* clear old state */
1226 led_message &= 0xfff8ffff;
1227
1228 /* toggle state */
1229 led_message |= (activity_led_state << 16);
1230 mod_timer(&emp->timer, jiffies + msecs_to_jiffies(100));
1231 } else {
1232 /* switch to idle */
1233 led_message &= 0xfff8ffff;
1234 if (emp->blink_policy == BLINK_OFF)
1235 led_message |= (1 << 16);
1236 }
1237 ahci_transmit_led_message(ap, led_message, 4);
1238}
1239
1240static void ahci_init_sw_activity(struct ata_link *link)
1241{
1242 struct ata_port *ap = link->ap;
1243 struct ahci_port_priv *pp = ap->private_data;
1244 struct ahci_em_priv *emp = &pp->em_priv[link->pmp];
1245
1246 /* init activity stats, setup timer */
1247 emp->saved_activity = emp->activity = 0;
1248 setup_timer(&emp->timer, ahci_sw_activity_blink, (unsigned long)link);
1249
1250 /* check our blink policy and set flag for link if it's enabled */
1251 if (emp->blink_policy)
1252 link->flags |= ATA_LFLAG_SW_ACTIVITY;
1253}
1254
1255static int ahci_reset_em(struct ata_host *host)
1256{
1257 void __iomem *mmio = host->iomap[AHCI_PCI_BAR];
1258 u32 em_ctl;
1259
1260 em_ctl = readl(mmio + HOST_EM_CTL);
1261 if ((em_ctl & EM_CTL_TM) || (em_ctl & EM_CTL_RST))
1262 return -EINVAL;
1263
1264 writel(em_ctl | EM_CTL_RST, mmio + HOST_EM_CTL);
1265 return 0;
1266}
1267
1268static ssize_t ahci_transmit_led_message(struct ata_port *ap, u32 state,
1269 ssize_t size)
1270{
1271 struct ahci_host_priv *hpriv = ap->host->private_data;
1272 struct ahci_port_priv *pp = ap->private_data;
1273 void __iomem *mmio = ap->host->iomap[AHCI_PCI_BAR];
1274 u32 em_ctl;
1275 u32 message[] = {0, 0};
1276 unsigned int flags;
1277 int pmp;
1278 struct ahci_em_priv *emp;
1279
1280 /* get the slot number from the message */
1281 pmp = (state & 0x0000ff00) >> 8;
1282 if (pmp < MAX_SLOTS)
1283 emp = &pp->em_priv[pmp];
1284 else
1285 return -EINVAL;
1286
1287 spin_lock_irqsave(ap->lock, flags);
1288
1289 /*
1290 * if we are still busy transmitting a previous message,
1291 * do not allow
1292 */
1293 em_ctl = readl(mmio + HOST_EM_CTL);
1294 if (em_ctl & EM_CTL_TM) {
1295 spin_unlock_irqrestore(ap->lock, flags);
1296 return -EINVAL;
1297 }
1298
1299 /*
1300 * create message header - this is all zero except for
1301 * the message size, which is 4 bytes.
1302 */
1303 message[0] |= (4 << 8);
1304
1305 /* ignore 0:4 of byte zero, fill in port info yourself */
1306 message[1] = ((state & 0xfffffff0) | ap->port_no);
1307
1308 /* write message to EM_LOC */
1309 writel(message[0], mmio + hpriv->em_loc);
1310 writel(message[1], mmio + hpriv->em_loc+4);
1311
1312 /* save off new led state for port/slot */
1313 emp->led_state = message[1];
1314
1315 /*
1316 * tell hardware to transmit the message
1317 */
1318 writel(em_ctl | EM_CTL_TM, mmio + HOST_EM_CTL);
1319
1320 spin_unlock_irqrestore(ap->lock, flags);
1321 return size;
1322}
1323
1324static ssize_t ahci_led_show(struct ata_port *ap, char *buf)
1325{
1326 struct ahci_port_priv *pp = ap->private_data;
1327 struct ata_link *link;
1328 struct ahci_em_priv *emp;
1329 int rc = 0;
1330
1331 ata_port_for_each_link(link, ap) {
1332 emp = &pp->em_priv[link->pmp];
1333 rc += sprintf(buf, "%lx\n", emp->led_state);
1334 }
1335 return rc;
1336}
1337
1338static ssize_t ahci_led_store(struct ata_port *ap, const char *buf,
1339 size_t size)
1340{
1341 int state;
1342 int pmp;
1343 struct ahci_port_priv *pp = ap->private_data;
1344 struct ahci_em_priv *emp;
1345
1346 state = simple_strtoul(buf, NULL, 0);
1347
1348 /* get the slot number from the message */
1349 pmp = (state & 0x0000ff00) >> 8;
1350 if (pmp < MAX_SLOTS)
1351 emp = &pp->em_priv[pmp];
1352 else
1353 return -EINVAL;
1354
1355 /* mask off the activity bits if we are in sw_activity
1356 * mode, user should turn off sw_activity before setting
1357 * activity led through em_message
1358 */
1359 if (emp->blink_policy)
1360 state &= 0xfff8ffff;
1361
1362 return ahci_transmit_led_message(ap, state, size);
1363}
1364
1365static ssize_t ahci_activity_store(struct ata_device *dev, enum sw_activity val)
1366{
1367 struct ata_link *link = dev->link;
1368 struct ata_port *ap = link->ap;
1369 struct ahci_port_priv *pp = ap->private_data;
1370 struct ahci_em_priv *emp = &pp->em_priv[link->pmp];
1371 u32 port_led_state = emp->led_state;
1372
1373 /* save the desired Activity LED behavior */
1374 if (val == OFF) {
1375 /* clear LFLAG */
1376 link->flags &= ~(ATA_LFLAG_SW_ACTIVITY);
1377
1378 /* set the LED to OFF */
1379 port_led_state &= 0xfff80000;
1380 port_led_state |= (ap->port_no | (link->pmp << 8));
1381 ahci_transmit_led_message(ap, port_led_state, 4);
1382 } else {
1383 link->flags |= ATA_LFLAG_SW_ACTIVITY;
1384 if (val == BLINK_OFF) {
1385 /* set LED to ON for idle */
1386 port_led_state &= 0xfff80000;
1387 port_led_state |= (ap->port_no | (link->pmp << 8));
1388 port_led_state |= 0x00010000; /* check this */
1389 ahci_transmit_led_message(ap, port_led_state, 4);
1390 }
1391 }
1392 emp->blink_policy = val;
1393 return 0;
1394}
1395
1396static ssize_t ahci_activity_show(struct ata_device *dev, char *buf)
1397{
1398 struct ata_link *link = dev->link;
1399 struct ata_port *ap = link->ap;
1400 struct ahci_port_priv *pp = ap->private_data;
1401 struct ahci_em_priv *emp = &pp->em_priv[link->pmp];
1402
1403 /* display the saved value of activity behavior for this
1404 * disk.
1405 */
1406 return sprintf(buf, "%d\n", emp->blink_policy);
1407}
1408
1119static void ahci_port_init(struct pci_dev *pdev, struct ata_port *ap, 1409static void ahci_port_init(struct pci_dev *pdev, struct ata_port *ap,
1120 int port_no, void __iomem *mmio, 1410 int port_no, void __iomem *mmio,
1121 void __iomem *port_mmio) 1411 void __iomem *port_mmio)
@@ -1846,7 +2136,8 @@ static unsigned int ahci_qc_issue(struct ata_queued_cmd *qc)
1846 if (qc->tf.protocol == ATA_PROT_NCQ) 2136 if (qc->tf.protocol == ATA_PROT_NCQ)
1847 writel(1 << qc->tag, port_mmio + PORT_SCR_ACT); 2137 writel(1 << qc->tag, port_mmio + PORT_SCR_ACT);
1848 writel(1 << qc->tag, port_mmio + PORT_CMD_ISSUE); 2138 writel(1 << qc->tag, port_mmio + PORT_CMD_ISSUE);
1849 readl(port_mmio + PORT_CMD_ISSUE); /* flush */ 2139
2140 ahci_sw_activity(qc->dev->link);
1850 2141
1851 return 0; 2142 return 0;
1852} 2143}
@@ -2154,7 +2445,8 @@ static void ahci_print_info(struct ata_host *host)
2154 dev_printk(KERN_INFO, &pdev->dev, 2445 dev_printk(KERN_INFO, &pdev->dev,
2155 "flags: " 2446 "flags: "
2156 "%s%s%s%s%s%s%s" 2447 "%s%s%s%s%s%s%s"
2157 "%s%s%s%s%s%s%s\n" 2448 "%s%s%s%s%s%s%s"
2449 "%s\n"
2158 , 2450 ,
2159 2451
2160 cap & (1 << 31) ? "64bit " : "", 2452 cap & (1 << 31) ? "64bit " : "",
@@ -2171,7 +2463,8 @@ static void ahci_print_info(struct ata_host *host)
2171 cap & (1 << 17) ? "pmp " : "", 2463 cap & (1 << 17) ? "pmp " : "",
2172 cap & (1 << 15) ? "pio " : "", 2464 cap & (1 << 15) ? "pio " : "",
2173 cap & (1 << 14) ? "slum " : "", 2465 cap & (1 << 14) ? "slum " : "",
2174 cap & (1 << 13) ? "part " : "" 2466 cap & (1 << 13) ? "part " : "",
2467 cap & (1 << 6) ? "ems ": ""
2175 ); 2468 );
2176} 2469}
2177 2470
@@ -2291,6 +2584,24 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
2291 if (hpriv->cap & HOST_CAP_PMP) 2584 if (hpriv->cap & HOST_CAP_PMP)
2292 pi.flags |= ATA_FLAG_PMP; 2585 pi.flags |= ATA_FLAG_PMP;
2293 2586
2587 if (ahci_em_messages && (hpriv->cap & HOST_CAP_EMS)) {
2588 u8 messages;
2589 void __iomem *mmio = pcim_iomap_table(pdev)[AHCI_PCI_BAR];
2590 u32 em_loc = readl(mmio + HOST_EM_LOC);
2591 u32 em_ctl = readl(mmio + HOST_EM_CTL);
2592
2593 messages = (em_ctl & 0x000f0000) >> 16;
2594
2595 /* we only support LED message type right now */
2596 if ((messages & 0x01) && (ahci_em_messages == 1)) {
2597 /* store em_loc */
2598 hpriv->em_loc = ((em_loc >> 16) * 4);
2599 pi.flags |= ATA_FLAG_EM;
2600 if (!(em_ctl & EM_CTL_ALHD))
2601 pi.flags |= ATA_FLAG_SW_ACTIVITY;
2602 }
2603 }
2604
2294 /* CAP.NP sometimes indicate the index of the last enabled 2605 /* CAP.NP sometimes indicate the index of the last enabled
2295 * port, at other times, that of the last possible port, so 2606 * port, at other times, that of the last possible port, so
2296 * determining the maximum port number requires looking at 2607 * determining the maximum port number requires looking at
@@ -2304,6 +2615,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
2304 host->iomap = pcim_iomap_table(pdev); 2615 host->iomap = pcim_iomap_table(pdev);
2305 host->private_data = hpriv; 2616 host->private_data = hpriv;
2306 2617
2618 if (pi.flags & ATA_FLAG_EM)
2619 ahci_reset_em(host);
2620
2307 for (i = 0; i < host->n_ports; i++) { 2621 for (i = 0; i < host->n_ports; i++) {
2308 struct ata_port *ap = host->ports[i]; 2622 struct ata_port *ap = host->ports[i];
2309 2623
@@ -2314,6 +2628,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
2314 /* set initial link pm policy */ 2628 /* set initial link pm policy */
2315 ap->pm_policy = NOT_AVAILABLE; 2629 ap->pm_policy = NOT_AVAILABLE;
2316 2630
2631 /* set enclosure management message type */
2632 if (ap->flags & ATA_FLAG_EM)
2633 ap->em_message_type = ahci_em_messages;
2634
2635
2317 /* disabled/not-implemented port */ 2636 /* disabled/not-implemented port */
2318 if (!(hpriv->port_map & (1 << i))) 2637 if (!(hpriv->port_map & (1 << i)))
2319 ap->ops = &ata_dummy_port_ops; 2638 ap->ops = &ata_dummy_port_ops;
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 303fc0d2b978..9bef1a84fe3f 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -54,7 +54,6 @@
54#include <linux/completion.h> 54#include <linux/completion.h>
55#include <linux/suspend.h> 55#include <linux/suspend.h>
56#include <linux/workqueue.h> 56#include <linux/workqueue.h>
57#include <linux/jiffies.h>
58#include <linux/scatterlist.h> 57#include <linux/scatterlist.h>
59#include <linux/io.h> 58#include <linux/io.h>
60#include <scsi/scsi.h> 59#include <scsi/scsi.h>
@@ -145,7 +144,7 @@ static int libata_dma_mask = ATA_DMA_MASK_ATA|ATA_DMA_MASK_ATAPI|ATA_DMA_MASK_CF
145module_param_named(dma, libata_dma_mask, int, 0444); 144module_param_named(dma, libata_dma_mask, int, 0444);
146MODULE_PARM_DESC(dma, "DMA enable/disable (0x1==ATA, 0x2==ATAPI, 0x4==CF)"); 145MODULE_PARM_DESC(dma, "DMA enable/disable (0x1==ATA, 0x2==ATAPI, 0x4==CF)");
147 146
148static int ata_probe_timeout = ATA_TMOUT_INTERNAL / HZ; 147static int ata_probe_timeout;
149module_param(ata_probe_timeout, int, 0444); 148module_param(ata_probe_timeout, int, 0444);
150MODULE_PARM_DESC(ata_probe_timeout, "Set ATA probing timeout (seconds)"); 149MODULE_PARM_DESC(ata_probe_timeout, "Set ATA probing timeout (seconds)");
151 150
@@ -1533,7 +1532,7 @@ unsigned long ata_id_xfermask(const u16 *id)
1533 * @ap: The ata_port to queue port_task for 1532 * @ap: The ata_port to queue port_task for
1534 * @fn: workqueue function to be scheduled 1533 * @fn: workqueue function to be scheduled
1535 * @data: data for @fn to use 1534 * @data: data for @fn to use
1536 * @delay: delay time for workqueue function 1535 * @delay: delay time in msecs for workqueue function
1537 * 1536 *
1538 * Schedule @fn(@data) for execution after @delay jiffies using 1537 * Schedule @fn(@data) for execution after @delay jiffies using
1539 * port_task. There is one port_task per port and it's the 1538 * port_task. There is one port_task per port and it's the
@@ -1552,7 +1551,7 @@ void ata_pio_queue_task(struct ata_port *ap, void *data, unsigned long delay)
1552 ap->port_task_data = data; 1551 ap->port_task_data = data;
1553 1552
1554 /* may fail if ata_port_flush_task() in progress */ 1553 /* may fail if ata_port_flush_task() in progress */
1555 queue_delayed_work(ata_wq, &ap->port_task, delay); 1554 queue_delayed_work(ata_wq, &ap->port_task, msecs_to_jiffies(delay));
1556} 1555}
1557 1556
1558/** 1557/**
@@ -1612,6 +1611,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
1612 struct ata_link *link = dev->link; 1611 struct ata_link *link = dev->link;
1613 struct ata_port *ap = link->ap; 1612 struct ata_port *ap = link->ap;
1614 u8 command = tf->command; 1613 u8 command = tf->command;
1614 int auto_timeout = 0;
1615 struct ata_queued_cmd *qc; 1615 struct ata_queued_cmd *qc;
1616 unsigned int tag, preempted_tag; 1616 unsigned int tag, preempted_tag;
1617 u32 preempted_sactive, preempted_qc_active; 1617 u32 preempted_sactive, preempted_qc_active;
@@ -1684,8 +1684,14 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
1684 1684
1685 spin_unlock_irqrestore(ap->lock, flags); 1685 spin_unlock_irqrestore(ap->lock, flags);
1686 1686
1687 if (!timeout) 1687 if (!timeout) {
1688 timeout = ata_probe_timeout * 1000 / HZ; 1688 if (ata_probe_timeout)
1689 timeout = ata_probe_timeout * 1000;
1690 else {
1691 timeout = ata_internal_cmd_timeout(dev, command);
1692 auto_timeout = 1;
1693 }
1694 }
1689 1695
1690 rc = wait_for_completion_timeout(&wait, msecs_to_jiffies(timeout)); 1696 rc = wait_for_completion_timeout(&wait, msecs_to_jiffies(timeout));
1691 1697
@@ -1761,6 +1767,9 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
1761 1767
1762 spin_unlock_irqrestore(ap->lock, flags); 1768 spin_unlock_irqrestore(ap->lock, flags);
1763 1769
1770 if ((err_mask & AC_ERR_TIMEOUT) && auto_timeout)
1771 ata_internal_cmd_timed_out(dev, command);
1772
1764 return err_mask; 1773 return err_mask;
1765} 1774}
1766 1775
@@ -3319,7 +3328,7 @@ int ata_wait_ready(struct ata_link *link, unsigned long deadline,
3319 int (*check_ready)(struct ata_link *link)) 3328 int (*check_ready)(struct ata_link *link))
3320{ 3329{
3321 unsigned long start = jiffies; 3330 unsigned long start = jiffies;
3322 unsigned long nodev_deadline = start + ATA_TMOUT_FF_WAIT; 3331 unsigned long nodev_deadline = ata_deadline(start, ATA_TMOUT_FF_WAIT);
3323 int warned = 0; 3332 int warned = 0;
3324 3333
3325 if (time_after(nodev_deadline, deadline)) 3334 if (time_after(nodev_deadline, deadline))
@@ -3387,7 +3396,7 @@ int ata_wait_ready(struct ata_link *link, unsigned long deadline,
3387int ata_wait_after_reset(struct ata_link *link, unsigned long deadline, 3396int ata_wait_after_reset(struct ata_link *link, unsigned long deadline,
3388 int (*check_ready)(struct ata_link *link)) 3397 int (*check_ready)(struct ata_link *link))
3389{ 3398{
3390 msleep(ATA_WAIT_AFTER_RESET_MSECS); 3399 msleep(ATA_WAIT_AFTER_RESET);
3391 3400
3392 return ata_wait_ready(link, deadline, check_ready); 3401 return ata_wait_ready(link, deadline, check_ready);
3393} 3402}
@@ -3417,13 +3426,13 @@ int ata_wait_after_reset(struct ata_link *link, unsigned long deadline,
3417int sata_link_debounce(struct ata_link *link, const unsigned long *params, 3426int sata_link_debounce(struct ata_link *link, const unsigned long *params,
3418 unsigned long deadline) 3427 unsigned long deadline)
3419{ 3428{
3420 unsigned long interval_msec = params[0]; 3429 unsigned long interval = params[0];
3421 unsigned long duration = msecs_to_jiffies(params[1]); 3430 unsigned long duration = params[1];
3422 unsigned long last_jiffies, t; 3431 unsigned long last_jiffies, t;
3423 u32 last, cur; 3432 u32 last, cur;
3424 int rc; 3433 int rc;
3425 3434
3426 t = jiffies + msecs_to_jiffies(params[2]); 3435 t = ata_deadline(jiffies, params[2]);
3427 if (time_before(t, deadline)) 3436 if (time_before(t, deadline))
3428 deadline = t; 3437 deadline = t;
3429 3438
@@ -3435,7 +3444,7 @@ int sata_link_debounce(struct ata_link *link, const unsigned long *params,
3435 last_jiffies = jiffies; 3444 last_jiffies = jiffies;
3436 3445
3437 while (1) { 3446 while (1) {
3438 msleep(interval_msec); 3447 msleep(interval);
3439 if ((rc = sata_scr_read(link, SCR_STATUS, &cur))) 3448 if ((rc = sata_scr_read(link, SCR_STATUS, &cur)))
3440 return rc; 3449 return rc;
3441 cur &= 0xf; 3450 cur &= 0xf;
@@ -3444,7 +3453,8 @@ int sata_link_debounce(struct ata_link *link, const unsigned long *params,
3444 if (cur == last) { 3453 if (cur == last) {
3445 if (cur == 1 && time_before(jiffies, deadline)) 3454 if (cur == 1 && time_before(jiffies, deadline))
3446 continue; 3455 continue;
3447 if (time_after(jiffies, last_jiffies + duration)) 3456 if (time_after(jiffies,
3457 ata_deadline(last_jiffies, duration)))
3448 return 0; 3458 return 0;
3449 continue; 3459 continue;
3450 } 3460 }
@@ -3636,7 +3646,8 @@ int sata_link_hardreset(struct ata_link *link, const unsigned long *timing,
3636 if (check_ready) { 3646 if (check_ready) {
3637 unsigned long pmp_deadline; 3647 unsigned long pmp_deadline;
3638 3648
3639 pmp_deadline = jiffies + ATA_TMOUT_PMP_SRST_WAIT; 3649 pmp_deadline = ata_deadline(jiffies,
3650 ATA_TMOUT_PMP_SRST_WAIT);
3640 if (time_after(pmp_deadline, deadline)) 3651 if (time_after(pmp_deadline, deadline))
3641 pmp_deadline = deadline; 3652 pmp_deadline = deadline;
3642 ata_wait_ready(link, pmp_deadline, check_ready); 3653 ata_wait_ready(link, pmp_deadline, check_ready);
@@ -6073,8 +6084,6 @@ static void __init ata_parse_force_param(void)
6073 6084
6074static int __init ata_init(void) 6085static int __init ata_init(void)
6075{ 6086{
6076 ata_probe_timeout *= HZ;
6077
6078 ata_parse_force_param(); 6087 ata_parse_force_param();
6079 6088
6080 ata_wq = create_workqueue("ata"); 6089 ata_wq = create_workqueue("ata");
@@ -6127,8 +6136,8 @@ int ata_ratelimit(void)
6127 * @reg: IO-mapped register 6136 * @reg: IO-mapped register
6128 * @mask: Mask to apply to read register value 6137 * @mask: Mask to apply to read register value
6129 * @val: Wait condition 6138 * @val: Wait condition
6130 * @interval_msec: polling interval in milliseconds 6139 * @interval: polling interval in milliseconds
6131 * @timeout_msec: timeout in milliseconds 6140 * @timeout: timeout in milliseconds
6132 * 6141 *
6133 * Waiting for some bits of register to change is a common 6142 * Waiting for some bits of register to change is a common
6134 * operation for ATA controllers. This function reads 32bit LE 6143 * operation for ATA controllers. This function reads 32bit LE
@@ -6146,10 +6155,9 @@ int ata_ratelimit(void)
6146 * The final register value. 6155 * The final register value.
6147 */ 6156 */
6148u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val, 6157u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val,
6149 unsigned long interval_msec, 6158 unsigned long interval, unsigned long timeout)
6150 unsigned long timeout_msec)
6151{ 6159{
6152 unsigned long timeout; 6160 unsigned long deadline;
6153 u32 tmp; 6161 u32 tmp;
6154 6162
6155 tmp = ioread32(reg); 6163 tmp = ioread32(reg);
@@ -6158,10 +6166,10 @@ u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val,
6158 * preceding writes reach the controller before starting to 6166 * preceding writes reach the controller before starting to
6159 * eat away the timeout. 6167 * eat away the timeout.
6160 */ 6168 */
6161 timeout = jiffies + (timeout_msec * HZ) / 1000; 6169 deadline = ata_deadline(jiffies, timeout);
6162 6170
6163 while ((tmp & mask) == val && time_before(jiffies, timeout)) { 6171 while ((tmp & mask) == val && time_before(jiffies, deadline)) {
6164 msleep(interval_msec); 6172 msleep(interval);
6165 tmp = ioread32(reg); 6173 tmp = ioread32(reg);
6166 } 6174 }
6167 6175
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 7894d83ea1eb..58bdc538d229 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -66,15 +66,19 @@ enum {
66 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 66 ATA_ECAT_DUBIOUS_TOUT_HSM = 6,
67 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 67 ATA_ECAT_DUBIOUS_UNK_DEV = 7,
68 ATA_ECAT_NR = 8, 68 ATA_ECAT_NR = 8,
69};
70 69
71/* Waiting in ->prereset can never be reliable. It's sometimes nice 70 ATA_EH_CMD_DFL_TIMEOUT = 5000,
72 * to wait there but it can't be depended upon; otherwise, we wouldn't 71
73 * be resetting. Just give it enough time for most drives to spin up. 72 /* always put at least this amount of time between resets */
74 */ 73 ATA_EH_RESET_COOL_DOWN = 5000,
75enum { 74
76 ATA_EH_PRERESET_TIMEOUT = 10 * HZ, 75 /* Waiting in ->prereset can never be reliable. It's
77 ATA_EH_FASTDRAIN_INTERVAL = 3 * HZ, 76 * sometimes nice to wait there but it can't be depended upon;
77 * otherwise, we wouldn't be resetting. Just give it enough
78 * time for most drives to spin up.
79 */
80 ATA_EH_PRERESET_TIMEOUT = 10000,
81 ATA_EH_FASTDRAIN_INTERVAL = 3000,
78}; 82};
79 83
80/* The following table determines how we sequence resets. Each entry 84/* The following table determines how we sequence resets. Each entry
@@ -84,12 +88,59 @@ enum {
84 * are mostly for error handling, hotplug and retarded devices. 88 * are mostly for error handling, hotplug and retarded devices.
85 */ 89 */
86static const unsigned long ata_eh_reset_timeouts[] = { 90static const unsigned long ata_eh_reset_timeouts[] = {
87 10 * HZ, /* most drives spin up by 10sec */ 91 10000, /* most drives spin up by 10sec */
88 10 * HZ, /* > 99% working drives spin up before 20sec */ 92 10000, /* > 99% working drives spin up before 20sec */
89 35 * HZ, /* give > 30 secs of idleness for retarded devices */ 93 35000, /* give > 30 secs of idleness for retarded devices */
90 5 * HZ, /* and sweet one last chance */ 94 5000, /* and sweet one last chance */
91 /* > 1 min has elapsed, give up */ 95 ULONG_MAX, /* > 1 min has elapsed, give up */
96};
97
98static const unsigned long ata_eh_identify_timeouts[] = {
99 5000, /* covers > 99% of successes and not too boring on failures */
100 10000, /* combined time till here is enough even for media access */
101 30000, /* for true idiots */
102 ULONG_MAX,
103};
104
105static const unsigned long ata_eh_other_timeouts[] = {
106 5000, /* same rationale as identify timeout */
107 10000, /* ditto */
108 /* but no merciful 30sec for other commands, it just isn't worth it */
109 ULONG_MAX,
110};
111
112struct ata_eh_cmd_timeout_ent {
113 const u8 *commands;
114 const unsigned long *timeouts;
115};
116
117/* The following table determines timeouts to use for EH internal
118 * commands. Each table entry is a command class and matches the
119 * commands the entry applies to and the timeout table to use.
120 *
121 * On the retry after a command timed out, the next timeout value from
122 * the table is used. If the table doesn't contain further entries,
123 * the last value is used.
124 *
125 * ehc->cmd_timeout_idx keeps track of which timeout to use per
126 * command class, so if SET_FEATURES times out on the first try, the
127 * next try will use the second timeout value only for that class.
128 */
129#define CMDS(cmds...) (const u8 []){ cmds, 0 }
130static const struct ata_eh_cmd_timeout_ent
131ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
132 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI),
133 .timeouts = ata_eh_identify_timeouts, },
134 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT),
135 .timeouts = ata_eh_other_timeouts, },
136 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT),
137 .timeouts = ata_eh_other_timeouts, },
138 { .commands = CMDS(ATA_CMD_SET_FEATURES),
139 .timeouts = ata_eh_other_timeouts, },
140 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS),
141 .timeouts = ata_eh_other_timeouts, },
92}; 142};
143#undef CMDS
93 144
94static void __ata_port_freeze(struct ata_port *ap); 145static void __ata_port_freeze(struct ata_port *ap);
95#ifdef CONFIG_PM 146#ifdef CONFIG_PM
@@ -236,6 +287,73 @@ void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset,
236 287
237#endif /* CONFIG_PCI */ 288#endif /* CONFIG_PCI */
238 289
290static int ata_lookup_timeout_table(u8 cmd)
291{
292 int i;
293
294 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) {
295 const u8 *cur;
296
297 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++)
298 if (*cur == cmd)
299 return i;
300 }
301
302 return -1;
303}
304
305/**
306 * ata_internal_cmd_timeout - determine timeout for an internal command
307 * @dev: target device
308 * @cmd: internal command to be issued
309 *
310 * Determine timeout for internal command @cmd for @dev.
311 *
312 * LOCKING:
313 * EH context.
314 *
315 * RETURNS:
316 * Determined timeout.
317 */
318unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd)
319{
320 struct ata_eh_context *ehc = &dev->link->eh_context;
321 int ent = ata_lookup_timeout_table(cmd);
322 int idx;
323
324 if (ent < 0)
325 return ATA_EH_CMD_DFL_TIMEOUT;
326
327 idx = ehc->cmd_timeout_idx[dev->devno][ent];
328 return ata_eh_cmd_timeout_table[ent].timeouts[idx];
329}
330
331/**
332 * ata_internal_cmd_timed_out - notification for internal command timeout
333 * @dev: target device
334 * @cmd: internal command which timed out
335 *
336 * Notify EH that internal command @cmd for @dev timed out. This
337 * function should be called only for commands whose timeouts are
338 * determined using ata_internal_cmd_timeout().
339 *
340 * LOCKING:
341 * EH context.
342 */
343void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd)
344{
345 struct ata_eh_context *ehc = &dev->link->eh_context;
346 int ent = ata_lookup_timeout_table(cmd);
347 int idx;
348
349 if (ent < 0)
350 return;
351
352 idx = ehc->cmd_timeout_idx[dev->devno][ent];
353 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX)
354 ehc->cmd_timeout_idx[dev->devno][ent]++;
355}
356
239static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 357static void ata_ering_record(struct ata_ering *ering, unsigned int eflags,
240 unsigned int err_mask) 358 unsigned int err_mask)
241{ 359{
@@ -486,6 +604,9 @@ void ata_scsi_error(struct Scsi_Host *host)
486 if (ata_ncq_enabled(dev)) 604 if (ata_ncq_enabled(dev))
487 ehc->saved_ncq_enabled |= 1 << devno; 605 ehc->saved_ncq_enabled |= 1 << devno;
488 } 606 }
607
608 /* set last reset timestamp to some time in the past */
609 ehc->last_reset = jiffies - 60 * HZ;
489 } 610 }
490 611
491 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 612 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
@@ -641,7 +762,7 @@ void ata_eh_fastdrain_timerfn(unsigned long arg)
641 /* some qcs have finished, give it another chance */ 762 /* some qcs have finished, give it another chance */
642 ap->fastdrain_cnt = cnt; 763 ap->fastdrain_cnt = cnt;
643 ap->fastdrain_timer.expires = 764 ap->fastdrain_timer.expires =
644 jiffies + ATA_EH_FASTDRAIN_INTERVAL; 765 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
645 add_timer(&ap->fastdrain_timer); 766 add_timer(&ap->fastdrain_timer);
646 } 767 }
647 768
@@ -681,7 +802,8 @@ static void ata_eh_set_pending(struct ata_port *ap, int fastdrain)
681 802
682 /* activate fast drain */ 803 /* activate fast drain */
683 ap->fastdrain_cnt = cnt; 804 ap->fastdrain_cnt = cnt;
684 ap->fastdrain_timer.expires = jiffies + ATA_EH_FASTDRAIN_INTERVAL; 805 ap->fastdrain_timer.expires =
806 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
685 add_timer(&ap->fastdrain_timer); 807 add_timer(&ap->fastdrain_timer);
686} 808}
687 809
@@ -1238,6 +1360,7 @@ static int ata_eh_read_log_10h(struct ata_device *dev,
1238 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1360 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
1239 * @dev: device to perform REQUEST_SENSE to 1361 * @dev: device to perform REQUEST_SENSE to
1240 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1362 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
1363 * @dfl_sense_key: default sense key to use
1241 * 1364 *
1242 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1365 * Perform ATAPI REQUEST_SENSE after the device reported CHECK
1243 * SENSE. This function is EH helper. 1366 * SENSE. This function is EH helper.
@@ -1248,13 +1371,13 @@ static int ata_eh_read_log_10h(struct ata_device *dev,
1248 * RETURNS: 1371 * RETURNS:
1249 * 0 on success, AC_ERR_* mask on failure 1372 * 0 on success, AC_ERR_* mask on failure
1250 */ 1373 */
1251static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc) 1374static unsigned int atapi_eh_request_sense(struct ata_device *dev,
1375 u8 *sense_buf, u8 dfl_sense_key)
1252{ 1376{
1253 struct ata_device *dev = qc->dev; 1377 u8 cdb[ATAPI_CDB_LEN] =
1254 unsigned char *sense_buf = qc->scsicmd->sense_buffer; 1378 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 };
1255 struct ata_port *ap = dev->link->ap; 1379 struct ata_port *ap = dev->link->ap;
1256 struct ata_taskfile tf; 1380 struct ata_taskfile tf;
1257 u8 cdb[ATAPI_CDB_LEN];
1258 1381
1259 DPRINTK("ATAPI request sense\n"); 1382 DPRINTK("ATAPI request sense\n");
1260 1383
@@ -1265,15 +1388,11 @@ static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc)
1265 * for the case where they are -not- overwritten 1388 * for the case where they are -not- overwritten
1266 */ 1389 */
1267 sense_buf[0] = 0x70; 1390 sense_buf[0] = 0x70;
1268 sense_buf[2] = qc->result_tf.feature >> 4; 1391 sense_buf[2] = dfl_sense_key;
1269 1392
1270 /* some devices time out if garbage left in tf */ 1393 /* some devices time out if garbage left in tf */
1271 ata_tf_init(dev, &tf); 1394 ata_tf_init(dev, &tf);
1272 1395
1273 memset(cdb, 0, ATAPI_CDB_LEN);
1274 cdb[0] = REQUEST_SENSE;
1275 cdb[4] = SCSI_SENSE_BUFFERSIZE;
1276
1277 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1396 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
1278 tf.command = ATA_CMD_PACKET; 1397 tf.command = ATA_CMD_PACKET;
1279 1398
@@ -1445,7 +1564,9 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
1445 1564
1446 case ATA_DEV_ATAPI: 1565 case ATA_DEV_ATAPI:
1447 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1566 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) {
1448 tmp = atapi_eh_request_sense(qc); 1567 tmp = atapi_eh_request_sense(qc->dev,
1568 qc->scsicmd->sense_buffer,
1569 qc->result_tf.feature >> 4);
1449 if (!tmp) { 1570 if (!tmp) {
1450 /* ATA_QCFLAG_SENSE_VALID is used to 1571 /* ATA_QCFLAG_SENSE_VALID is used to
1451 * tell atapi_qc_complete() that sense 1572 * tell atapi_qc_complete() that sense
@@ -2071,13 +2192,12 @@ int ata_eh_reset(struct ata_link *link, int classify,
2071 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 2192 ata_prereset_fn_t prereset, ata_reset_fn_t softreset,
2072 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 2193 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
2073{ 2194{
2074 const int max_tries = ARRAY_SIZE(ata_eh_reset_timeouts);
2075 struct ata_port *ap = link->ap; 2195 struct ata_port *ap = link->ap;
2076 struct ata_eh_context *ehc = &link->eh_context; 2196 struct ata_eh_context *ehc = &link->eh_context;
2077 unsigned int *classes = ehc->classes; 2197 unsigned int *classes = ehc->classes;
2078 unsigned int lflags = link->flags; 2198 unsigned int lflags = link->flags;
2079 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2199 int verbose = !(ehc->i.flags & ATA_EHI_QUIET);
2080 int try = 0; 2200 int max_tries = 0, try = 0;
2081 struct ata_device *dev; 2201 struct ata_device *dev;
2082 unsigned long deadline, now; 2202 unsigned long deadline, now;
2083 ata_reset_fn_t reset; 2203 ata_reset_fn_t reset;
@@ -2088,11 +2208,20 @@ int ata_eh_reset(struct ata_link *link, int classify,
2088 /* 2208 /*
2089 * Prepare to reset 2209 * Prepare to reset
2090 */ 2210 */
2211 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX)
2212 max_tries++;
2213
2214 now = jiffies;
2215 deadline = ata_deadline(ehc->last_reset, ATA_EH_RESET_COOL_DOWN);
2216 if (time_before(now, deadline))
2217 schedule_timeout_uninterruptible(deadline - now);
2218
2091 spin_lock_irqsave(ap->lock, flags); 2219 spin_lock_irqsave(ap->lock, flags);
2092 ap->pflags |= ATA_PFLAG_RESETTING; 2220 ap->pflags |= ATA_PFLAG_RESETTING;
2093 spin_unlock_irqrestore(ap->lock, flags); 2221 spin_unlock_irqrestore(ap->lock, flags);
2094 2222
2095 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2223 ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
2224 ehc->last_reset = jiffies;
2096 2225
2097 ata_link_for_each_dev(dev, link) { 2226 ata_link_for_each_dev(dev, link) {
2098 /* If we issue an SRST then an ATA drive (not ATAPI) 2227 /* If we issue an SRST then an ATA drive (not ATAPI)
@@ -2125,7 +2254,8 @@ int ata_eh_reset(struct ata_link *link, int classify,
2125 } 2254 }
2126 2255
2127 if (prereset) { 2256 if (prereset) {
2128 rc = prereset(link, jiffies + ATA_EH_PRERESET_TIMEOUT); 2257 rc = prereset(link,
2258 ata_deadline(jiffies, ATA_EH_PRERESET_TIMEOUT));
2129 if (rc) { 2259 if (rc) {
2130 if (rc == -ENOENT) { 2260 if (rc == -ENOENT) {
2131 ata_link_printk(link, KERN_DEBUG, 2261 ata_link_printk(link, KERN_DEBUG,
@@ -2157,10 +2287,11 @@ int ata_eh_reset(struct ata_link *link, int classify,
2157 /* 2287 /*
2158 * Perform reset 2288 * Perform reset
2159 */ 2289 */
2290 ehc->last_reset = jiffies;
2160 if (ata_is_host_link(link)) 2291 if (ata_is_host_link(link))
2161 ata_eh_freeze_port(ap); 2292 ata_eh_freeze_port(ap);
2162 2293
2163 deadline = jiffies + ata_eh_reset_timeouts[try++]; 2294 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]);
2164 2295
2165 if (reset) { 2296 if (reset) {
2166 if (verbose) 2297 if (verbose)
@@ -2277,6 +2408,7 @@ int ata_eh_reset(struct ata_link *link, int classify,
2277 2408
2278 /* reset successful, schedule revalidation */ 2409 /* reset successful, schedule revalidation */
2279 ata_eh_done(link, NULL, ATA_EH_RESET); 2410 ata_eh_done(link, NULL, ATA_EH_RESET);
2411 ehc->last_reset = jiffies;
2280 ehc->i.action |= ATA_EH_REVALIDATE; 2412 ehc->i.action |= ATA_EH_REVALIDATE;
2281 2413
2282 rc = 0; 2414 rc = 0;
@@ -2303,9 +2435,9 @@ int ata_eh_reset(struct ata_link *link, int classify,
2303 if (time_before(now, deadline)) { 2435 if (time_before(now, deadline)) {
2304 unsigned long delta = deadline - now; 2436 unsigned long delta = deadline - now;
2305 2437
2306 ata_link_printk(link, KERN_WARNING, "reset failed " 2438 ata_link_printk(link, KERN_WARNING,
2307 "(errno=%d), retrying in %u secs\n", 2439 "reset failed (errno=%d), retrying in %u secs\n",
2308 rc, (jiffies_to_msecs(delta) + 999) / 1000); 2440 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000));
2309 2441
2310 while (delta) 2442 while (delta)
2311 delta = schedule_timeout_uninterruptible(delta); 2443 delta = schedule_timeout_uninterruptible(delta);
@@ -2583,8 +2715,11 @@ static int ata_eh_handle_dev_fail(struct ata_device *dev, int err)
2583 ata_eh_detach_dev(dev); 2715 ata_eh_detach_dev(dev);
2584 2716
2585 /* schedule probe if necessary */ 2717 /* schedule probe if necessary */
2586 if (ata_eh_schedule_probe(dev)) 2718 if (ata_eh_schedule_probe(dev)) {
2587 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2719 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
2720 memset(ehc->cmd_timeout_idx[dev->devno], 0,
2721 sizeof(ehc->cmd_timeout_idx[dev->devno]));
2722 }
2588 2723
2589 return 1; 2724 return 1;
2590 } else { 2725 } else {
@@ -2622,7 +2757,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
2622{ 2757{
2623 struct ata_link *link; 2758 struct ata_link *link;
2624 struct ata_device *dev; 2759 struct ata_device *dev;
2625 int nr_failed_devs, nr_disabled_devs; 2760 int nr_failed_devs;
2626 int rc; 2761 int rc;
2627 unsigned long flags; 2762 unsigned long flags;
2628 2763
@@ -2665,7 +2800,6 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
2665 retry: 2800 retry:
2666 rc = 0; 2801 rc = 0;
2667 nr_failed_devs = 0; 2802 nr_failed_devs = 0;
2668 nr_disabled_devs = 0;
2669 2803
2670 /* if UNLOADING, finish immediately */ 2804 /* if UNLOADING, finish immediately */
2671 if (ap->pflags & ATA_PFLAG_UNLOADING) 2805 if (ap->pflags & ATA_PFLAG_UNLOADING)
@@ -2732,8 +2866,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
2732 2866
2733dev_fail: 2867dev_fail:
2734 nr_failed_devs++; 2868 nr_failed_devs++;
2735 if (ata_eh_handle_dev_fail(dev, rc)) 2869 ata_eh_handle_dev_fail(dev, rc);
2736 nr_disabled_devs++;
2737 2870
2738 if (ap->pflags & ATA_PFLAG_FROZEN) { 2871 if (ap->pflags & ATA_PFLAG_FROZEN) {
2739 /* PMP reset requires working host port. 2872 /* PMP reset requires working host port.
@@ -2745,18 +2878,8 @@ dev_fail:
2745 } 2878 }
2746 } 2879 }
2747 2880
2748 if (nr_failed_devs) { 2881 if (nr_failed_devs)
2749 if (nr_failed_devs != nr_disabled_devs) {
2750 ata_port_printk(ap, KERN_WARNING, "failed to recover "
2751 "some devices, retrying in 5 secs\n");
2752 ssleep(5);
2753 } else {
2754 /* no device left to recover, repeat fast */
2755 msleep(500);
2756 }
2757
2758 goto retry; 2882 goto retry;
2759 }
2760 2883
2761 out: 2884 out:
2762 if (rc && r_failed_link) 2885 if (rc && r_failed_link)
diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c
index 7daf4c0f6216..b65db309c181 100644
--- a/drivers/ata/libata-pmp.c
+++ b/drivers/ata/libata-pmp.c
@@ -727,19 +727,12 @@ static int sata_pmp_eh_recover_pmp(struct ata_port *ap,
727 } 727 }
728 728
729 if (tries) { 729 if (tries) {
730 int sleep = ehc->i.flags & ATA_EHI_DID_RESET;
731
732 /* consecutive revalidation failures? speed down */ 730 /* consecutive revalidation failures? speed down */
733 if (reval_failed) 731 if (reval_failed)
734 sata_down_spd_limit(link); 732 sata_down_spd_limit(link);
735 else 733 else
736 reval_failed = 1; 734 reval_failed = 1;
737 735
738 ata_dev_printk(dev, KERN_WARNING,
739 "retrying reset%s\n",
740 sleep ? " in 5 secs" : "");
741 if (sleep)
742 ssleep(5);
743 ehc->i.action |= ATA_EH_RESET; 736 ehc->i.action |= ATA_EH_RESET;
744 goto retry; 737 goto retry;
745 } else { 738 } else {
@@ -785,7 +778,8 @@ static int sata_pmp_eh_handle_disabled_links(struct ata_port *ap)
785 * SError.N working. 778 * SError.N working.
786 */ 779 */
787 sata_link_hardreset(link, sata_deb_timing_normal, 780 sata_link_hardreset(link, sata_deb_timing_normal,
788 jiffies + ATA_TMOUT_INTERNAL_QUICK, NULL, NULL); 781 ata_deadline(jiffies, ATA_TMOUT_INTERNAL_QUICK),
782 NULL, NULL);
789 783
790 /* unconditionally clear SError.N */ 784 /* unconditionally clear SError.N */
791 rc = sata_scr_write(link, SCR_ERROR, SERR_PHYRDY_CHG); 785 rc = sata_scr_write(link, SCR_ERROR, SERR_PHYRDY_CHG);
@@ -990,10 +984,7 @@ static int sata_pmp_eh_recover(struct ata_port *ap)
990 goto retry; 984 goto retry;
991 985
992 if (--pmp_tries) { 986 if (--pmp_tries) {
993 ata_port_printk(ap, KERN_WARNING,
994 "failed to recover PMP, retrying in 5 secs\n");
995 pmp_ehc->i.action |= ATA_EH_RESET; 987 pmp_ehc->i.action |= ATA_EH_RESET;
996 ssleep(5);
997 goto retry; 988 goto retry;
998 } 989 }
999 990
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 499ccc628d81..f3b4b15a8dc4 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -190,6 +190,85 @@ static void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq)
190 scsi_build_sense_buffer(0, cmd->sense_buffer, sk, asc, ascq); 190 scsi_build_sense_buffer(0, cmd->sense_buffer, sk, asc, ascq);
191} 191}
192 192
193static ssize_t
194ata_scsi_em_message_store(struct device *dev, struct device_attribute *attr,
195 const char *buf, size_t count)
196{
197 struct Scsi_Host *shost = class_to_shost(dev);
198 struct ata_port *ap = ata_shost_to_port(shost);
199 if (ap->ops->em_store && (ap->flags & ATA_FLAG_EM))
200 return ap->ops->em_store(ap, buf, count);
201 return -EINVAL;
202}
203
204static ssize_t
205ata_scsi_em_message_show(struct device *dev, struct device_attribute *attr,
206 char *buf)
207{
208 struct Scsi_Host *shost = class_to_shost(dev);
209 struct ata_port *ap = ata_shost_to_port(shost);
210
211 if (ap->ops->em_show && (ap->flags & ATA_FLAG_EM))
212 return ap->ops->em_show(ap, buf);
213 return -EINVAL;
214}
215DEVICE_ATTR(em_message, S_IRUGO | S_IWUGO,
216 ata_scsi_em_message_show, ata_scsi_em_message_store);
217EXPORT_SYMBOL_GPL(dev_attr_em_message);
218
219static ssize_t
220ata_scsi_em_message_type_show(struct device *dev, struct device_attribute *attr,
221 char *buf)
222{
223 struct Scsi_Host *shost = class_to_shost(dev);
224 struct ata_port *ap = ata_shost_to_port(shost);
225
226 return snprintf(buf, 23, "%d\n", ap->em_message_type);
227}
228DEVICE_ATTR(em_message_type, S_IRUGO,
229 ata_scsi_em_message_type_show, NULL);
230EXPORT_SYMBOL_GPL(dev_attr_em_message_type);
231
232static ssize_t
233ata_scsi_activity_show(struct device *dev, struct device_attribute *attr,
234 char *buf)
235{
236 struct scsi_device *sdev = to_scsi_device(dev);
237 struct ata_port *ap = ata_shost_to_port(sdev->host);
238 struct ata_device *atadev = ata_scsi_find_dev(ap, sdev);
239
240 if (ap->ops->sw_activity_show && (ap->flags & ATA_FLAG_SW_ACTIVITY))
241 return ap->ops->sw_activity_show(atadev, buf);
242 return -EINVAL;
243}
244
245static ssize_t
246ata_scsi_activity_store(struct device *dev, struct device_attribute *attr,
247 const char *buf, size_t count)
248{
249 struct scsi_device *sdev = to_scsi_device(dev);
250 struct ata_port *ap = ata_shost_to_port(sdev->host);
251 struct ata_device *atadev = ata_scsi_find_dev(ap, sdev);
252 enum sw_activity val;
253 int rc;
254
255 if (ap->ops->sw_activity_store && (ap->flags & ATA_FLAG_SW_ACTIVITY)) {
256 val = simple_strtoul(buf, NULL, 0);
257 switch (val) {
258 case OFF: case BLINK_ON: case BLINK_OFF:
259 rc = ap->ops->sw_activity_store(atadev, val);
260 if (!rc)
261 return count;
262 else
263 return rc;
264 }
265 }
266 return -EINVAL;
267}
268DEVICE_ATTR(sw_activity, S_IWUGO | S_IRUGO, ata_scsi_activity_show,
269 ata_scsi_activity_store);
270EXPORT_SYMBOL_GPL(dev_attr_sw_activity);
271
193static void ata_scsi_invalid_field(struct scsi_cmnd *cmd, 272static void ata_scsi_invalid_field(struct scsi_cmnd *cmd,
194 void (*done)(struct scsi_cmnd *)) 273 void (*done)(struct scsi_cmnd *))
195{ 274{
@@ -1779,7 +1858,9 @@ static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf)
1779 const u8 pages[] = { 1858 const u8 pages[] = {
1780 0x00, /* page 0x00, this page */ 1859 0x00, /* page 0x00, this page */
1781 0x80, /* page 0x80, unit serial no page */ 1860 0x80, /* page 0x80, unit serial no page */
1782 0x83 /* page 0x83, device ident page */ 1861 0x83, /* page 0x83, device ident page */
1862 0x89, /* page 0x89, ata info page */
1863 0xb1, /* page 0xb1, block device characteristics page */
1783 }; 1864 };
1784 1865
1785 rbuf[3] = sizeof(pages); /* number of supported VPD pages */ 1866 rbuf[3] = sizeof(pages); /* number of supported VPD pages */
@@ -1900,6 +1981,19 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf)
1900 return 0; 1981 return 0;
1901} 1982}
1902 1983
1984static unsigned int ata_scsiop_inq_b1(struct ata_scsi_args *args, u8 *rbuf)
1985{
1986 rbuf[1] = 0xb1;
1987 rbuf[3] = 0x3c;
1988 if (ata_id_major_version(args->id) > 7) {
1989 rbuf[4] = args->id[217] >> 8;
1990 rbuf[5] = args->id[217];
1991 rbuf[7] = args->id[168] & 0xf;
1992 }
1993
1994 return 0;
1995}
1996
1903/** 1997/**
1904 * ata_scsiop_noop - Command handler that simply returns success. 1998 * ata_scsiop_noop - Command handler that simply returns success.
1905 * @args: device IDENTIFY data / SCSI command of interest. 1999 * @args: device IDENTIFY data / SCSI command of interest.
@@ -2921,6 +3015,9 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd,
2921 case 0x89: 3015 case 0x89:
2922 ata_scsi_rbuf_fill(&args, ata_scsiop_inq_89); 3016 ata_scsi_rbuf_fill(&args, ata_scsiop_inq_89);
2923 break; 3017 break;
3018 case 0xb1:
3019 ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b1);
3020 break;
2924 default: 3021 default:
2925 ata_scsi_invalid_field(cmd, done); 3022 ata_scsi_invalid_field(cmd, done);
2926 break; 3023 break;
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index c0908c225483..304fdc6f1dc2 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -345,8 +345,8 @@ void ata_sff_dma_pause(struct ata_port *ap)
345/** 345/**
346 * ata_sff_busy_sleep - sleep until BSY clears, or timeout 346 * ata_sff_busy_sleep - sleep until BSY clears, or timeout
347 * @ap: port containing status register to be polled 347 * @ap: port containing status register to be polled
348 * @tmout_pat: impatience timeout 348 * @tmout_pat: impatience timeout in msecs
349 * @tmout: overall timeout 349 * @tmout: overall timeout in msecs
350 * 350 *
351 * Sleep until ATA Status register bit BSY clears, 351 * Sleep until ATA Status register bit BSY clears,
352 * or a timeout occurs. 352 * or a timeout occurs.
@@ -365,7 +365,7 @@ int ata_sff_busy_sleep(struct ata_port *ap,
365 365
366 status = ata_sff_busy_wait(ap, ATA_BUSY, 300); 366 status = ata_sff_busy_wait(ap, ATA_BUSY, 300);
367 timer_start = jiffies; 367 timer_start = jiffies;
368 timeout = timer_start + tmout_pat; 368 timeout = ata_deadline(timer_start, tmout_pat);
369 while (status != 0xff && (status & ATA_BUSY) && 369 while (status != 0xff && (status & ATA_BUSY) &&
370 time_before(jiffies, timeout)) { 370 time_before(jiffies, timeout)) {
371 msleep(50); 371 msleep(50);
@@ -377,7 +377,7 @@ int ata_sff_busy_sleep(struct ata_port *ap,
377 "port is slow to respond, please be patient " 377 "port is slow to respond, please be patient "
378 "(Status 0x%x)\n", status); 378 "(Status 0x%x)\n", status);
379 379
380 timeout = timer_start + tmout; 380 timeout = ata_deadline(timer_start, tmout);
381 while (status != 0xff && (status & ATA_BUSY) && 381 while (status != 0xff && (status & ATA_BUSY) &&
382 time_before(jiffies, timeout)) { 382 time_before(jiffies, timeout)) {
383 msleep(50); 383 msleep(50);
@@ -390,7 +390,7 @@ int ata_sff_busy_sleep(struct ata_port *ap,
390 if (status & ATA_BUSY) { 390 if (status & ATA_BUSY) {
391 ata_port_printk(ap, KERN_ERR, "port failed to respond " 391 ata_port_printk(ap, KERN_ERR, "port failed to respond "
392 "(%lu secs, Status 0x%x)\n", 392 "(%lu secs, Status 0x%x)\n",
393 tmout / HZ, status); 393 DIV_ROUND_UP(tmout, 1000), status);
394 return -EBUSY; 394 return -EBUSY;
395 } 395 }
396 396
@@ -1888,7 +1888,7 @@ int ata_sff_wait_after_reset(struct ata_link *link, unsigned int devmask,
1888 unsigned int dev1 = devmask & (1 << 1); 1888 unsigned int dev1 = devmask & (1 << 1);
1889 int rc, ret = 0; 1889 int rc, ret = 0;
1890 1890
1891 msleep(ATA_WAIT_AFTER_RESET_MSECS); 1891 msleep(ATA_WAIT_AFTER_RESET);
1892 1892
1893 /* always check readiness of the master device */ 1893 /* always check readiness of the master device */
1894 rc = ata_sff_wait_ready(link, deadline); 1894 rc = ata_sff_wait_ready(link, deadline);
@@ -2371,7 +2371,8 @@ void ata_bus_reset(struct ata_port *ap)
2371 2371
2372 /* issue bus reset */ 2372 /* issue bus reset */
2373 if (ap->flags & ATA_FLAG_SRST) { 2373 if (ap->flags & ATA_FLAG_SRST) {
2374 rc = ata_bus_softreset(ap, devmask, jiffies + 40 * HZ); 2374 rc = ata_bus_softreset(ap, devmask,
2375 ata_deadline(jiffies, 40000));
2375 if (rc && rc != -ENODEV) 2376 if (rc && rc != -ENODEV)
2376 goto err_out; 2377 goto err_out;
2377 } 2378 }
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 1cf803adbc95..f6f9c28ec7f8 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -151,6 +151,8 @@ extern void ata_scsi_dev_rescan(struct work_struct *work);
151extern int ata_bus_probe(struct ata_port *ap); 151extern int ata_bus_probe(struct ata_port *ap);
152 152
153/* libata-eh.c */ 153/* libata-eh.c */
154extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
155extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd);
154extern enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd); 156extern enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd);
155extern void ata_scsi_error(struct Scsi_Host *host); 157extern void ata_scsi_error(struct Scsi_Host *host);
156extern void ata_port_wait_eh(struct ata_port *ap); 158extern void ata_port_wait_eh(struct ata_port *ap);
diff --git a/drivers/ata/pata_bf54x.c b/drivers/ata/pata_bf54x.c
index 55516103626a..d3932901a3b3 100644
--- a/drivers/ata/pata_bf54x.c
+++ b/drivers/ata/pata_bf54x.c
@@ -1011,7 +1011,7 @@ static void bfin_bus_post_reset(struct ata_port *ap, unsigned int devmask)
1011 void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr; 1011 void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
1012 unsigned int dev0 = devmask & (1 << 0); 1012 unsigned int dev0 = devmask & (1 << 0);
1013 unsigned int dev1 = devmask & (1 << 1); 1013 unsigned int dev1 = devmask & (1 << 1);
1014 unsigned long timeout; 1014 unsigned long deadline;
1015 1015
1016 /* if device 0 was found in ata_devchk, wait for its 1016 /* if device 0 was found in ata_devchk, wait for its
1017 * BSY bit to clear 1017 * BSY bit to clear
@@ -1022,7 +1022,7 @@ static void bfin_bus_post_reset(struct ata_port *ap, unsigned int devmask)
1022 /* if device 1 was found in ata_devchk, wait for 1022 /* if device 1 was found in ata_devchk, wait for
1023 * register access, then wait for BSY to clear 1023 * register access, then wait for BSY to clear
1024 */ 1024 */
1025 timeout = jiffies + ATA_TMOUT_BOOT; 1025 deadline = ata_deadline(jiffies, ATA_TMOUT_BOOT);
1026 while (dev1) { 1026 while (dev1) {
1027 u8 nsect, lbal; 1027 u8 nsect, lbal;
1028 1028
@@ -1031,7 +1031,7 @@ static void bfin_bus_post_reset(struct ata_port *ap, unsigned int devmask)
1031 lbal = read_atapi_register(base, ATA_REG_LBAL); 1031 lbal = read_atapi_register(base, ATA_REG_LBAL);
1032 if ((nsect == 1) && (lbal == 1)) 1032 if ((nsect == 1) && (lbal == 1))
1033 break; 1033 break;
1034 if (time_after(jiffies, timeout)) { 1034 if (time_after(jiffies, deadline)) {
1035 dev1 = 0; 1035 dev1 = 0;
1036 break; 1036 break;
1037 } 1037 }
diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c
index fe7cc8ed4ea4..bc037ffce200 100644
--- a/drivers/ata/pata_legacy.c
+++ b/drivers/ata/pata_legacy.c
@@ -305,7 +305,7 @@ static unsigned int pdc_data_xfer_vlb(struct ata_device *dev,
305 iowrite32_rep(ap->ioaddr.data_addr, buf, buflen >> 2); 305 iowrite32_rep(ap->ioaddr.data_addr, buf, buflen >> 2);
306 306
307 if (unlikely(slop)) { 307 if (unlikely(slop)) {
308 u32 pad; 308 __le32 pad;
309 if (rw == READ) { 309 if (rw == READ) {
310 pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr)); 310 pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr));
311 memcpy(buf + buflen - slop, &pad, slop); 311 memcpy(buf + buflen - slop, &pad, slop);
@@ -746,14 +746,12 @@ static unsigned int vlb32_data_xfer(struct ata_device *adev, unsigned char *buf,
746 ioread32_rep(ap->ioaddr.data_addr, buf, buflen >> 2); 746 ioread32_rep(ap->ioaddr.data_addr, buf, buflen >> 2);
747 747
748 if (unlikely(slop)) { 748 if (unlikely(slop)) {
749 u32 pad; 749 __le32 pad;
750 if (rw == WRITE) { 750 if (rw == WRITE) {
751 memcpy(&pad, buf + buflen - slop, slop); 751 memcpy(&pad, buf + buflen - slop, slop);
752 pad = le32_to_cpu(pad); 752 iowrite32(le32_to_cpu(pad), ap->ioaddr.data_addr);
753 iowrite32(pad, ap->ioaddr.data_addr);
754 } else { 753 } else {
755 pad = ioread32(ap->ioaddr.data_addr); 754 pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr));
756 pad = cpu_to_le32(pad);
757 memcpy(buf + buflen - slop, &pad, slop); 755 memcpy(buf + buflen - slop, &pad, slop);
758 } 756 }
759 } 757 }
diff --git a/drivers/ata/pata_qdi.c b/drivers/ata/pata_qdi.c
index 97e5b090d7c2..63b7a1c165a5 100644
--- a/drivers/ata/pata_qdi.c
+++ b/drivers/ata/pata_qdi.c
@@ -137,7 +137,7 @@ static unsigned int qdi_data_xfer(struct ata_device *dev, unsigned char *buf,
137 iowrite32_rep(ap->ioaddr.data_addr, buf, buflen >> 2); 137 iowrite32_rep(ap->ioaddr.data_addr, buf, buflen >> 2);
138 138
139 if (unlikely(slop)) { 139 if (unlikely(slop)) {
140 u32 pad; 140 __le32 pad;
141 if (rw == READ) { 141 if (rw == READ) {
142 pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr)); 142 pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr));
143 memcpy(buf + buflen - slop, &pad, slop); 143 memcpy(buf + buflen - slop, &pad, slop);
diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c
index bbf5aa345e68..16673d168573 100644
--- a/drivers/ata/pata_scc.c
+++ b/drivers/ata/pata_scc.c
@@ -696,7 +696,7 @@ static void scc_bmdma_stop (struct ata_queued_cmd *qc)
696 696
697 if (reg & INTSTS_BMSINT) { 697 if (reg & INTSTS_BMSINT) {
698 unsigned int classes; 698 unsigned int classes;
699 unsigned long deadline = jiffies + ATA_TMOUT_BOOT; 699 unsigned long deadline = ata_deadline(jiffies, ATA_TMOUT_BOOT);
700 printk(KERN_WARNING "%s: Internal Bus Error\n", DRV_NAME); 700 printk(KERN_WARNING "%s: Internal Bus Error\n", DRV_NAME);
701 out_be32(bmid_base + SCC_DMA_INTST, INTSTS_BMSINT); 701 out_be32(bmid_base + SCC_DMA_INTST, INTSTS_BMSINT);
702 /* TBD: SW reset */ 702 /* TBD: SW reset */
diff --git a/drivers/ata/pata_winbond.c b/drivers/ata/pata_winbond.c
index 474528f8fe3d..a7606b044a61 100644
--- a/drivers/ata/pata_winbond.c
+++ b/drivers/ata/pata_winbond.c
@@ -105,7 +105,7 @@ static unsigned int winbond_data_xfer(struct ata_device *dev,
105 iowrite32_rep(ap->ioaddr.data_addr, buf, buflen >> 2); 105 iowrite32_rep(ap->ioaddr.data_addr, buf, buflen >> 2);
106 106
107 if (unlikely(slop)) { 107 if (unlikely(slop)) {
108 u32 pad; 108 __le32 pad;
109 if (rw == READ) { 109 if (rw == READ) {
110 pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr)); 110 pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr));
111 memcpy(buf + buflen - slop, &pad, slop); 111 memcpy(buf + buflen - slop, &pad, slop);
diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c
index 16aa6839aa5a..fb13b82aacba 100644
--- a/drivers/ata/sata_svw.c
+++ b/drivers/ata/sata_svw.c
@@ -253,21 +253,29 @@ static void k2_bmdma_start_mmio(struct ata_queued_cmd *qc)
253 /* start host DMA transaction */ 253 /* start host DMA transaction */
254 dmactl = readb(mmio + ATA_DMA_CMD); 254 dmactl = readb(mmio + ATA_DMA_CMD);
255 writeb(dmactl | ATA_DMA_START, mmio + ATA_DMA_CMD); 255 writeb(dmactl | ATA_DMA_START, mmio + ATA_DMA_CMD);
256 /* There is a race condition in certain SATA controllers that can 256 /* This works around possible data corruption.
257 be seen when the r/w command is given to the controller before the 257
258 host DMA is started. On a Read command, the controller would initiate 258 On certain SATA controllers that can be seen when the r/w
259 the command to the drive even before it sees the DMA start. When there 259 command is given to the controller before the host DMA is
260 are very fast drives connected to the controller, or when the data request 260 started.
261 hits in the drive cache, there is the possibility that the drive returns a part 261
262 or all of the requested data to the controller before the DMA start is issued. 262 On a Read command, the controller would initiate the
263 In this case, the controller would become confused as to what to do with the data. 263 command to the drive even before it sees the DMA
264 In the worst case when all the data is returned back to the controller, the 264 start. When there are very fast drives connected to the
265 controller could hang. In other cases it could return partial data returning 265 controller, or when the data request hits in the drive
266 in data corruption. This problem has been seen in PPC systems and can also appear 266 cache, there is the possibility that the drive returns a
267 on an system with very fast disks, where the SATA controller is sitting behind a 267 part or all of the requested data to the controller before
268 number of bridges, and hence there is significant latency between the r/w command 268 the DMA start is issued. In this case, the controller
269 and the start command. */ 269 would become confused as to what to do with the data. In
270 /* issue r/w command if the access is to ATA*/ 270 the worst case when all the data is returned back to the
271 controller, the controller could hang. In other cases it
272 could return partial data returning in data
273 corruption. This problem has been seen in PPC systems and
274 can also appear on an system with very fast disks, where
275 the SATA controller is sitting behind a number of bridges,
276 and hence there is significant latency between the r/w
277 command and the start command. */
278 /* issue r/w command if the access is to ATA */
271 if (qc->tf.protocol == ATA_PROT_DMA) 279 if (qc->tf.protocol == ATA_PROT_DMA)
272 ap->ops->sff_exec_command(ap, &qc->tf); 280 ap->ops->sff_exec_command(ap, &qc->tf);
273} 281}
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 1efe162e16d7..3f6d9b0a6abe 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -93,47 +93,27 @@ static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \
93#define define_siblings_show_func(name) \ 93#define define_siblings_show_func(name) \
94 define_siblings_show_map(name); define_siblings_show_list(name) 94 define_siblings_show_map(name); define_siblings_show_list(name)
95 95
96#ifdef topology_physical_package_id
97define_id_show_func(physical_package_id); 96define_id_show_func(physical_package_id);
98define_one_ro(physical_package_id); 97define_one_ro(physical_package_id);
99#define ref_physical_package_id_attr &attr_physical_package_id.attr,
100#else
101#define ref_physical_package_id_attr
102#endif
103 98
104#ifdef topology_core_id
105define_id_show_func(core_id); 99define_id_show_func(core_id);
106define_one_ro(core_id); 100define_one_ro(core_id);
107#define ref_core_id_attr &attr_core_id.attr,
108#else
109#define ref_core_id_attr
110#endif
111 101
112#ifdef topology_thread_siblings
113define_siblings_show_func(thread_siblings); 102define_siblings_show_func(thread_siblings);
114define_one_ro(thread_siblings); 103define_one_ro(thread_siblings);
115define_one_ro(thread_siblings_list); 104define_one_ro(thread_siblings_list);
116#define ref_thread_siblings_attr \
117 &attr_thread_siblings.attr, &attr_thread_siblings_list.attr,
118#else
119#define ref_thread_siblings_attr
120#endif
121 105
122#ifdef topology_core_siblings
123define_siblings_show_func(core_siblings); 106define_siblings_show_func(core_siblings);
124define_one_ro(core_siblings); 107define_one_ro(core_siblings);
125define_one_ro(core_siblings_list); 108define_one_ro(core_siblings_list);
126#define ref_core_siblings_attr \
127 &attr_core_siblings.attr, &attr_core_siblings_list.attr,
128#else
129#define ref_core_siblings_attr
130#endif
131 109
132static struct attribute *default_attrs[] = { 110static struct attribute *default_attrs[] = {
133 ref_physical_package_id_attr 111 &attr_physical_package_id.attr,
134 ref_core_id_attr 112 &attr_core_id.attr,
135 ref_thread_siblings_attr 113 &attr_thread_siblings.attr,
136 ref_core_siblings_attr 114 &attr_thread_siblings_list.attr,
115 &attr_core_siblings.attr,
116 &attr_core_siblings_list.attr,
137 NULL 117 NULL
138}; 118};
139 119
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 570f3b70dce7..5fdfa7c888ce 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -712,19 +712,17 @@ static void do_pd_request(struct request_queue * q)
712static int pd_special_command(struct pd_unit *disk, 712static int pd_special_command(struct pd_unit *disk,
713 enum action (*func)(struct pd_unit *disk)) 713 enum action (*func)(struct pd_unit *disk))
714{ 714{
715 DECLARE_COMPLETION_ONSTACK(wait); 715 struct request *rq;
716 struct request rq;
717 int err = 0; 716 int err = 0;
718 717
719 blk_rq_init(NULL, &rq); 718 rq = blk_get_request(disk->gd->queue, READ, __GFP_WAIT);
720 rq.rq_disk = disk->gd; 719
721 rq.end_io_data = &wait; 720 rq->cmd_type = REQ_TYPE_SPECIAL;
722 rq.end_io = blk_end_sync_rq; 721 rq->special = func;
723 blk_insert_request(disk->gd->queue, &rq, 0, func); 722
724 wait_for_completion(&wait); 723 err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
725 if (rq.errors) 724
726 err = -EIO; 725 blk_put_request(rq);
727 blk_put_request(&rq);
728 return err; 726 return err;
729} 727}
730 728
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index 7b46faf22318..5ca1d80de182 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -215,3 +215,22 @@ pm_good:
215 * but we still need to load before device_initcall 215 * but we still need to load before device_initcall
216 */ 216 */
217fs_initcall(init_acpi_pm_clocksource); 217fs_initcall(init_acpi_pm_clocksource);
218
219/*
220 * Allow an override of the IOPort. Stupid BIOSes do not tell us about
221 * the PMTimer, but we might know where it is.
222 */
223static int __init parse_pmtmr(char *arg)
224{
225 unsigned long base;
226
227 if (strict_strtoul(arg, 16, &base))
228 return -EINVAL;
229
230 printk(KERN_INFO "PMTMR IOPort override: 0x%04x -> 0x%04lx\n",
231 (unsigned int)pmtmr_ioport, base);
232 pmtmr_ioport = base;
233
234 return 1;
235}
236__setup("pmtmr=", parse_pmtmr);
diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c
index 845081b44f63..0177012845c6 100644
--- a/drivers/gpu/drm/drm_memory.c
+++ b/drivers/gpu/drm/drm_memory.c
@@ -167,6 +167,11 @@ void drm_core_ioremap(struct drm_map *map, struct drm_device *dev)
167} 167}
168EXPORT_SYMBOL(drm_core_ioremap); 168EXPORT_SYMBOL(drm_core_ioremap);
169 169
170void drm_core_ioremap_wc(struct drm_map *map, struct drm_device *dev)
171{
172 map->handle = ioremap_wc(map->offset, map->size);
173}
174EXPORT_SYMBOL(drm_core_ioremap_wc);
170void drm_core_ioremapfree(struct drm_map *map, struct drm_device *dev) 175void drm_core_ioremapfree(struct drm_map *map, struct drm_device *dev)
171{ 176{
172 if (!map->handle || !map->size) 177 if (!map->handle || !map->size)
diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index e53158f0ecb5..f0de81a5689d 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -1154,7 +1154,7 @@ static int radeon_do_init_cp(struct drm_device * dev, drm_radeon_init_t * init)
1154 dev_priv->gart_info.mapping.size = 1154 dev_priv->gart_info.mapping.size =
1155 dev_priv->gart_info.table_size; 1155 dev_priv->gart_info.table_size;
1156 1156
1157 drm_core_ioremap(&dev_priv->gart_info.mapping, dev); 1157 drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev);
1158 dev_priv->gart_info.addr = 1158 dev_priv->gart_info.addr =
1159 dev_priv->gart_info.mapping.handle; 1159 dev_priv->gart_info.mapping.handle;
1160 1160
diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c
index 35812823787b..eb8f72ca02f4 100644
--- a/drivers/i2c/algos/i2c-algo-bit.c
+++ b/drivers/i2c/algos/i2c-algo-bit.c
@@ -320,7 +320,7 @@ static int try_address(struct i2c_adapter *i2c_adap,
320 unsigned char addr, int retries) 320 unsigned char addr, int retries)
321{ 321{
322 struct i2c_algo_bit_data *adap = i2c_adap->algo_data; 322 struct i2c_algo_bit_data *adap = i2c_adap->algo_data;
323 int i, ret = -1; 323 int i, ret = 0;
324 324
325 for (i = 0; i <= retries; i++) { 325 for (i = 0; i <= retries; i++) {
326 ret = i2c_outb(i2c_adap, addr); 326 ret = i2c_outb(i2c_adap, addr);
@@ -508,7 +508,7 @@ static int bit_doAddress(struct i2c_adapter *i2c_adap, struct i2c_msg *msg)
508 addr ^= 1; 508 addr ^= 1;
509 ret = try_address(i2c_adap, addr, retries); 509 ret = try_address(i2c_adap, addr, retries);
510 if ((ret != 1) && !nak_ok) 510 if ((ret != 1) && !nak_ok)
511 return -EREMOTEIO; 511 return -ENXIO;
512 } 512 }
513 513
514 return 0; 514 return 0;
diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c
index e954a20b97a6..d50b329a3c94 100644
--- a/drivers/i2c/algos/i2c-algo-pca.c
+++ b/drivers/i2c/algos/i2c-algo-pca.c
@@ -182,7 +182,7 @@ static int pca_xfer(struct i2c_adapter *i2c_adap,
182 } 182 }
183 if (state != 0xf8) { 183 if (state != 0xf8) {
184 dev_dbg(&i2c_adap->dev, "bus is not idle. status is %#04x\n", state); 184 dev_dbg(&i2c_adap->dev, "bus is not idle. status is %#04x\n", state);
185 return -EIO; 185 return -EAGAIN;
186 } 186 }
187 187
188 DEB1("{{{ XFER %d messages\n", num); 188 DEB1("{{{ XFER %d messages\n", num);
diff --git a/drivers/i2c/algos/i2c-algo-pcf.c b/drivers/i2c/algos/i2c-algo-pcf.c
index 8907b0191677..1e328d19cd6d 100644
--- a/drivers/i2c/algos/i2c-algo-pcf.c
+++ b/drivers/i2c/algos/i2c-algo-pcf.c
@@ -78,6 +78,36 @@ static void i2c_stop(struct i2c_algo_pcf_data *adap)
78 set_pcf(adap, 1, I2C_PCF_STOP); 78 set_pcf(adap, 1, I2C_PCF_STOP);
79} 79}
80 80
81static void handle_lab(struct i2c_algo_pcf_data *adap, const int *status)
82{
83 DEB2(printk(KERN_INFO
84 "i2c-algo-pcf.o: lost arbitration (CSR 0x%02x)\n",
85 *status));
86
87 /* Cleanup from LAB -- reset and enable ESO.
88 * This resets the PCF8584; since we've lost the bus, no
89 * further attempts should be made by callers to clean up
90 * (no i2c_stop() etc.)
91 */
92 set_pcf(adap, 1, I2C_PCF_PIN);
93 set_pcf(adap, 1, I2C_PCF_ESO);
94
95 /* We pause for a time period sufficient for any running
96 * I2C transaction to complete -- the arbitration logic won't
97 * work properly until the next START is seen.
98 * It is assumed the bus driver or client has set a proper value.
99 *
100 * REVISIT: should probably use msleep instead of mdelay if we
101 * know we can sleep.
102 */
103 if (adap->lab_mdelay)
104 mdelay(adap->lab_mdelay);
105
106 DEB2(printk(KERN_INFO
107 "i2c-algo-pcf.o: reset LAB condition (CSR 0x%02x)\n",
108 get_pcf(adap, 1)));
109}
110
81static int wait_for_bb(struct i2c_algo_pcf_data *adap) { 111static int wait_for_bb(struct i2c_algo_pcf_data *adap) {
82 112
83 int timeout = DEF_TIMEOUT; 113 int timeout = DEF_TIMEOUT;
@@ -109,23 +139,7 @@ static int wait_for_pin(struct i2c_algo_pcf_data *adap, int *status) {
109 *status = get_pcf(adap, 1); 139 *status = get_pcf(adap, 1);
110 } 140 }
111 if (*status & I2C_PCF_LAB) { 141 if (*status & I2C_PCF_LAB) {
112 DEB2(printk(KERN_INFO 142 handle_lab(adap, status);
113 "i2c-algo-pcf.o: lost arbitration (CSR 0x%02x)\n",
114 *status));
115 /* Cleanup from LAB-- reset and enable ESO.
116 * This resets the PCF8584; since we've lost the bus, no
117 * further attempts should be made by callers to clean up
118 * (no i2c_stop() etc.)
119 */
120 set_pcf(adap, 1, I2C_PCF_PIN);
121 set_pcf(adap, 1, I2C_PCF_ESO);
122 /* TODO: we should pause for a time period sufficient for any
123 * running I2C transaction to complete-- the arbitration
124 * logic won't work properly until the next START is seen.
125 */
126 DEB2(printk(KERN_INFO
127 "i2c-algo-pcf.o: reset LAB condition (CSR 0x%02x)\n",
128 get_pcf(adap,1)));
129 return(-EINTR); 143 return(-EINTR);
130 } 144 }
131#endif 145#endif
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 48438cc5d0ca..6ee997b2817c 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -4,6 +4,9 @@
4 4
5menu "I2C Hardware Bus support" 5menu "I2C Hardware Bus support"
6 6
7comment "PC SMBus host controller drivers"
8 depends on PCI
9
7config I2C_ALI1535 10config I2C_ALI1535
8 tristate "ALI 1535" 11 tristate "ALI 1535"
9 depends on PCI 12 depends on PCI
@@ -73,6 +76,186 @@ config I2C_AMD8111
73 This driver can also be built as a module. If so, the module 76 This driver can also be built as a module. If so, the module
74 will be called i2c-amd8111. 77 will be called i2c-amd8111.
75 78
79config I2C_I801
80 tristate "Intel 82801 (ICH)"
81 depends on PCI
82 help
83 If you say yes to this option, support will be included for the Intel
84 801 family of mainboard I2C interfaces. Specifically, the following
85 versions of the chipset are supported:
86 82801AA
87 82801AB
88 82801BA
89 82801CA/CAM
90 82801DB
91 82801EB/ER (ICH5/ICH5R)
92 6300ESB
93 ICH6
94 ICH7
95 ESB2
96 ICH8
97 ICH9
98 Tolapai
99 ICH10
100
101 This driver can also be built as a module. If so, the module
102 will be called i2c-i801.
103
104config I2C_ISCH
105 tristate "Intel SCH SMBus 1.0"
106 depends on PCI
107 help
108 Say Y here if you want to use SMBus controller on the Intel SCH
109 based systems.
110
111 This driver can also be built as a module. If so, the module
112 will be called i2c-isch.
113
114config I2C_PIIX4
115 tristate "Intel PIIX4 and compatible (ATI/Serverworks/Broadcom/SMSC)"
116 depends on PCI
117 help
118 If you say yes to this option, support will be included for the Intel
119 PIIX4 family of mainboard I2C interfaces. Specifically, the following
120 versions of the chipset are supported (note that Serverworks is part
121 of Broadcom):
122 Intel PIIX4
123 Intel 440MX
124 ATI IXP200
125 ATI IXP300
126 ATI IXP400
127 ATI SB600
128 ATI SB700
129 ATI SB800
130 Serverworks OSB4
131 Serverworks CSB5
132 Serverworks CSB6
133 Serverworks HT-1000
134 SMSC Victory66
135
136 This driver can also be built as a module. If so, the module
137 will be called i2c-piix4.
138
139config I2C_NFORCE2
140 tristate "Nvidia nForce2, nForce3 and nForce4"
141 depends on PCI
142 help
143 If you say yes to this option, support will be included for the Nvidia
144 nForce2, nForce3 and nForce4 families of mainboard I2C interfaces.
145
146 This driver can also be built as a module. If so, the module
147 will be called i2c-nforce2.
148
149config I2C_NFORCE2_S4985
150 tristate "SMBus multiplexing on the Tyan S4985"
151 depends on I2C_NFORCE2 && EXPERIMENTAL
152 help
153 Enabling this option will add specific SMBus support for the Tyan
154 S4985 motherboard. On this 4-CPU board, the SMBus is multiplexed
155 over 4 different channels, where the various memory module EEPROMs
156 live. Saying yes here will give you access to these in addition
157 to the trunk.
158
159 This driver can also be built as a module. If so, the module
160 will be called i2c-nforce2-s4985.
161
162config I2C_SIS5595
163 tristate "SiS 5595"
164 depends on PCI
165 help
166 If you say yes to this option, support will be included for the
167 SiS5595 SMBus (a subset of I2C) interface.
168
169 This driver can also be built as a module. If so, the module
170 will be called i2c-sis5595.
171
172config I2C_SIS630
173 tristate "SiS 630/730"
174 depends on PCI
175 help
176 If you say yes to this option, support will be included for the
177 SiS630 and SiS730 SMBus (a subset of I2C) interface.
178
179 This driver can also be built as a module. If so, the module
180 will be called i2c-sis630.
181
182config I2C_SIS96X
183 tristate "SiS 96x"
184 depends on PCI
185 help
186 If you say yes to this option, support will be included for the SiS
187 96x SMBus (a subset of I2C) interfaces. Specifically, the following
188 chipsets are supported:
189 645/961
190 645DX/961
191 645DX/962
192 648/961
193 650/961
194 735
195 745
196
197 This driver can also be built as a module. If so, the module
198 will be called i2c-sis96x.
199
200config I2C_VIA
201 tristate "VIA VT82C586B"
202 depends on PCI && EXPERIMENTAL
203 select I2C_ALGOBIT
204 help
205 If you say yes to this option, support will be included for the VIA
206 82C586B I2C interface
207
208 This driver can also be built as a module. If so, the module
209 will be called i2c-via.
210
211config I2C_VIAPRO
212 tristate "VIA VT82C596/82C686/82xx and CX700"
213 depends on PCI
214 help
215 If you say yes to this option, support will be included for the VIA
216 VT82C596 and later SMBus interface. Specifically, the following
217 chipsets are supported:
218 VT82C596A/B
219 VT82C686A/B
220 VT8231
221 VT8233/A
222 VT8235
223 VT8237R/A/S
224 VT8251
225 CX700
226
227 This driver can also be built as a module. If so, the module
228 will be called i2c-viapro.
229
230comment "Mac SMBus host controller drivers"
231 depends on PPC_CHRP || PPC_PMAC
232
233config I2C_HYDRA
234 tristate "CHRP Apple Hydra Mac I/O I2C interface"
235 depends on PCI && PPC_CHRP && EXPERIMENTAL
236 select I2C_ALGOBIT
237 help
238 This supports the use of the I2C interface in the Apple Hydra Mac
239 I/O chip on some CHRP machines (e.g. the LongTrail). Say Y if you
240 have such a machine.
241
242 This support is also available as a module. If so, the module
243 will be called i2c-hydra.
244
245config I2C_POWERMAC
246 tristate "Powermac I2C interface"
247 depends on PPC_PMAC
248 default y
249 help
250 This exposes the various PowerMac i2c interfaces to the linux i2c
251 layer and to userland. It is used by various drivers on the PowerMac
252 platform, and should generally be enabled.
253
254 This support is also available as a module. If so, the module
255 will be called i2c-powermac.
256
257comment "I2C system bus drivers (mostly embedded / system-on-chip)"
258
76config I2C_AT91 259config I2C_AT91
77 tristate "Atmel AT91 I2C Two-Wire interface (TWI)" 260 tristate "Atmel AT91 I2C Two-Wire interface (TWI)"
78 depends on ARCH_AT91 && EXPERIMENTAL && BROKEN 261 depends on ARCH_AT91 && EXPERIMENTAL && BROKEN
@@ -101,10 +284,9 @@ config I2C_AU1550
101config I2C_BLACKFIN_TWI 284config I2C_BLACKFIN_TWI
102 tristate "Blackfin TWI I2C support" 285 tristate "Blackfin TWI I2C support"
103 depends on BLACKFIN 286 depends on BLACKFIN
287 depends on !BF561 && !BF531 && !BF532 && !BF533
104 help 288 help
105 This is the TWI I2C device driver for Blackfin BF522, BF525, 289 This is the I2C bus driver for Blackfin on-chip TWI interface.
106 BF527, BF534, BF536, BF537 and BF54x. For other Blackfin processors,
107 please don't use this driver.
108 290
109 This driver can also be built as a module. If so, the module 291 This driver can also be built as a module. If so, the module
110 will be called i2c-bfin-twi. 292 will be called i2c-bfin-twi.
@@ -117,6 +299,16 @@ config I2C_BLACKFIN_TWI_CLK_KHZ
117 help 299 help
118 The unit of the TWI clock is kHz. 300 The unit of the TWI clock is kHz.
119 301
302config I2C_CPM
303 tristate "Freescale CPM1 or CPM2 (MPC8xx/826x)"
304 depends on (CPM1 || CPM2) && OF_I2C
305 help
306 This supports the use of the I2C interface on Freescale
307 processors with CPM1 or CPM2.
308
309 This driver can also be built as a module. If so, the module
310 will be called i2c-cpm.
311
120config I2C_DAVINCI 312config I2C_DAVINCI
121 tristate "DaVinci I2C driver" 313 tristate "DaVinci I2C driver"
122 depends on ARCH_DAVINCI 314 depends on ARCH_DAVINCI
@@ -130,17 +322,6 @@ config I2C_DAVINCI
130 devices such as DaVinci NIC. 322 devices such as DaVinci NIC.
131 For details please see http://www.ti.com/davinci 323 For details please see http://www.ti.com/davinci
132 324
133config I2C_ELEKTOR
134 tristate "Elektor ISA card"
135 depends on ISA && BROKEN_ON_SMP
136 select I2C_ALGOPCF
137 help
138 This supports the PCF8584 ISA bus I2C adapter. Say Y if you own
139 such an adapter.
140
141 This support is also available as a module. If so, the module
142 will be called i2c-elektor.
143
144config I2C_GPIO 325config I2C_GPIO
145 tristate "GPIO-based bitbanging I2C" 326 tristate "GPIO-based bitbanging I2C"
146 depends on GENERIC_GPIO 327 depends on GENERIC_GPIO
@@ -149,104 +330,6 @@ config I2C_GPIO
149 This is a very simple bitbanging I2C driver utilizing the 330 This is a very simple bitbanging I2C driver utilizing the
150 arch-neutral GPIO API to control the SCL and SDA lines. 331 arch-neutral GPIO API to control the SCL and SDA lines.
151 332
152config I2C_HYDRA
153 tristate "CHRP Apple Hydra Mac I/O I2C interface"
154 depends on PCI && PPC_CHRP && EXPERIMENTAL
155 select I2C_ALGOBIT
156 help
157 This supports the use of the I2C interface in the Apple Hydra Mac
158 I/O chip on some CHRP machines (e.g. the LongTrail). Say Y if you
159 have such a machine.
160
161 This support is also available as a module. If so, the module
162 will be called i2c-hydra.
163
164config I2C_I801
165 tristate "Intel 82801 (ICH)"
166 depends on PCI
167 help
168 If you say yes to this option, support will be included for the Intel
169 801 family of mainboard I2C interfaces. Specifically, the following
170 versions of the chipset are supported:
171 82801AA
172 82801AB
173 82801BA
174 82801CA/CAM
175 82801DB
176 82801EB/ER (ICH5/ICH5R)
177 6300ESB
178 ICH6
179 ICH7
180 ESB2
181 ICH8
182 ICH9
183 Tolapai
184 ICH10
185
186 This driver can also be built as a module. If so, the module
187 will be called i2c-i801.
188
189config I2C_I810
190 tristate "Intel 810/815 (DEPRECATED)"
191 default n
192 depends on PCI
193 select I2C_ALGOBIT
194 help
195 If you say yes to this option, support will be included for the Intel
196 810/815 family of mainboard I2C interfaces. Specifically, the
197 following versions of the chipset are supported:
198 i810AA
199 i810AB
200 i810E
201 i815
202 i845G
203
204 This driver is deprecated in favor of the i810fb and intelfb drivers.
205
206 This driver can also be built as a module. If so, the module
207 will be called i2c-i810.
208
209config I2C_PXA
210 tristate "Intel PXA2XX I2C adapter (EXPERIMENTAL)"
211 depends on EXPERIMENTAL && ARCH_PXA
212 help
213 If you have devices in the PXA I2C bus, say yes to this option.
214 This driver can also be built as a module. If so, the module
215 will be called i2c-pxa.
216
217config I2C_PXA_SLAVE
218 bool "Intel PXA2XX I2C Slave comms support"
219 depends on I2C_PXA
220 help
221 Support I2C slave mode communications on the PXA I2C bus. This
222 is necessary for systems where the PXA may be a target on the
223 I2C bus.
224
225config I2C_PIIX4
226 tristate "Intel PIIX4 and compatible (ATI/Serverworks/Broadcom/SMSC)"
227 depends on PCI
228 help
229 If you say yes to this option, support will be included for the Intel
230 PIIX4 family of mainboard I2C interfaces. Specifically, the following
231 versions of the chipset are supported (note that Serverworks is part
232 of Broadcom):
233 Intel PIIX4
234 Intel 440MX
235 ATI IXP200
236 ATI IXP300
237 ATI IXP400
238 ATI SB600
239 ATI SB700
240 ATI SB800
241 Serverworks OSB4
242 Serverworks CSB5
243 Serverworks CSB6
244 Serverworks HT-1000
245 SMSC Victory66
246
247 This driver can also be built as a module. If so, the module
248 will be called i2c-piix4.
249
250config I2C_IBM_IIC 333config I2C_IBM_IIC
251 tristate "IBM PPC 4xx on-chip I2C interface" 334 tristate "IBM PPC 4xx on-chip I2C interface"
252 depends on 4xx 335 depends on 4xx
@@ -281,18 +364,6 @@ config I2C_IXP2000
281 This driver is deprecated and will be dropped soon. Use i2c-gpio 364 This driver is deprecated and will be dropped soon. Use i2c-gpio
282 instead. 365 instead.
283 366
284config I2C_POWERMAC
285 tristate "Powermac I2C interface"
286 depends on PPC_PMAC
287 default y
288 help
289 This exposes the various PowerMac i2c interfaces to the linux i2c
290 layer and to userland. It is used by various drivers on the PowerMac
291 platform, and should generally be enabled.
292
293 This support is also available as a module. If so, the module
294 will be called i2c-powermac.
295
296config I2C_MPC 367config I2C_MPC
297 tristate "MPC107/824x/85xx/52xx/86xx" 368 tristate "MPC107/824x/85xx/52xx/86xx"
298 depends on PPC32 369 depends on PPC32
@@ -305,15 +376,15 @@ config I2C_MPC
305 This driver can also be built as a module. If so, the module 376 This driver can also be built as a module. If so, the module
306 will be called i2c-mpc. 377 will be called i2c-mpc.
307 378
308config I2C_NFORCE2 379config I2C_MV64XXX
309 tristate "Nvidia nForce2, nForce3 and nForce4" 380 tristate "Marvell mv64xxx I2C Controller"
310 depends on PCI 381 depends on (MV64X60 || PLAT_ORION) && EXPERIMENTAL
311 help 382 help
312 If you say yes to this option, support will be included for the Nvidia 383 If you say yes to this option, support will be included for the
313 nForce2, nForce3 and nForce4 families of mainboard I2C interfaces. 384 built-in I2C interface on the Marvell 64xxx line of host bridges.
314 385
315 This driver can also be built as a module. If so, the module 386 This driver can also be built as a module. If so, the module
316 will be called i2c-nforce2. 387 will be called i2c-mv64xxx.
317 388
318config I2C_OCORES 389config I2C_OCORES
319 tristate "OpenCores I2C Controller" 390 tristate "OpenCores I2C Controller"
@@ -336,76 +407,37 @@ config I2C_OMAP
336 Like OMAP1510/1610/1710/5912 and OMAP242x. 407 Like OMAP1510/1610/1710/5912 and OMAP242x.
337 For details see http://www.ti.com/omap. 408 For details see http://www.ti.com/omap.
338 409
339config I2C_PARPORT
340 tristate "Parallel port adapter"
341 depends on PARPORT
342 select I2C_ALGOBIT
343 help
344 This supports parallel port I2C adapters such as the ones made by
345 Philips or Velleman, Analog Devices evaluation boards, and more.
346 Basically any adapter using the parallel port as an I2C bus with
347 no extra chipset is supported by this driver, or could be.
348
349 This driver is a replacement for (and was inspired by) an older
350 driver named i2c-philips-par. The new driver supports more devices,
351 and makes it easier to add support for new devices.
352
353 An adapter type parameter is now mandatory. Please read the file
354 Documentation/i2c/busses/i2c-parport for details.
355
356 Another driver exists, named i2c-parport-light, which doesn't depend
357 on the parport driver. This is meant for embedded systems. Don't say
358 Y here if you intend to say Y or M there.
359
360 This support is also available as a module. If so, the module
361 will be called i2c-parport.
362
363config I2C_PARPORT_LIGHT
364 tristate "Parallel port adapter (light)"
365 select I2C_ALGOBIT
366 help
367 This supports parallel port I2C adapters such as the ones made by
368 Philips or Velleman, Analog Devices evaluation boards, and more.
369 Basically any adapter using the parallel port as an I2C bus with
370 no extra chipset is supported by this driver, or could be.
371
372 This driver is a light version of i2c-parport. It doesn't depend
373 on the parport driver, and uses direct I/O access instead. This
374 might be preferred on embedded systems where wasting memory for
375 the clean but heavy parport handling is not an option. The
376 drawback is a reduced portability and the impossibility to
377 daisy-chain other parallel port devices.
378
379 Don't say Y here if you said Y or M to i2c-parport. Saying M to
380 both is possible but both modules should not be loaded at the same
381 time.
382
383 This support is also available as a module. If so, the module
384 will be called i2c-parport-light.
385
386config I2C_PASEMI 410config I2C_PASEMI
387 tristate "PA Semi SMBus interface" 411 tristate "PA Semi SMBus interface"
388 depends on PPC_PASEMI && PCI 412 depends on PPC_PASEMI && PCI
389 help 413 help
390 Supports the PA Semi PWRficient on-chip SMBus interfaces. 414 Supports the PA Semi PWRficient on-chip SMBus interfaces.
391 415
392config I2C_PROSAVAGE 416config I2C_PNX
393 tristate "S3/VIA (Pro)Savage (DEPRECATED)" 417 tristate "I2C bus support for Philips PNX targets"
394 default n 418 depends on ARCH_PNX4008
395 depends on PCI
396 select I2C_ALGOBIT
397 help 419 help
398 If you say yes to this option, support will be included for the 420 This driver supports the Philips IP3204 I2C IP block master and/or
399 I2C bus and DDC bus of the S3VIA embedded Savage4 and ProSavage8 421 slave controller
400 graphics processors.
401 chipsets supported:
402 S3/VIA KM266/VT8375 aka ProSavage8
403 S3/VIA KM133/VT8365 aka Savage4
404 422
405 This driver is deprecated in favor of the savagefb driver. 423 This driver can also be built as a module. If so, the module
424 will be called i2c-pnx.
406 425
407 This support is also available as a module. If so, the module 426config I2C_PXA
408 will be called i2c-prosavage. 427 tristate "Intel PXA2XX I2C adapter (EXPERIMENTAL)"
428 depends on EXPERIMENTAL && ARCH_PXA
429 help
430 If you have devices in the PXA I2C bus, say yes to this option.
431 This driver can also be built as a module. If so, the module
432 will be called i2c-pxa.
433
434config I2C_PXA_SLAVE
435 bool "Intel PXA2XX I2C Slave comms support"
436 depends on I2C_PXA
437 help
438 Support I2C slave mode communications on the PXA I2C bus. This
439 is necessary for systems where the PXA may be a target on the
440 I2C bus.
409 441
410config I2C_S3C2410 442config I2C_S3C2410
411 tristate "S3C2410 I2C Driver" 443 tristate "S3C2410 I2C Driver"
@@ -414,25 +446,24 @@ config I2C_S3C2410
414 Say Y here to include support for I2C controller in the 446 Say Y here to include support for I2C controller in the
415 Samsung S3C2410 based System-on-Chip devices. 447 Samsung S3C2410 based System-on-Chip devices.
416 448
417config I2C_SAVAGE4 449config I2C_SH7760
418 tristate "S3 Savage 4 (DEPRECATED)" 450 tristate "Renesas SH7760 I2C Controller"
419 default n 451 depends on CPU_SUBTYPE_SH7760
420 depends on PCI
421 select I2C_ALGOBIT
422 help 452 help
423 If you say yes to this option, support will be included for the 453 This driver supports the 2 I2C interfaces on the Renesas SH7760.
424 S3 Savage 4 I2C interface.
425
426 This driver is deprecated in favor of the savagefb driver.
427 454
428 This driver can also be built as a module. If so, the module 455 This driver can also be built as a module. If so, the module
429 will be called i2c-savage4. 456 will be called i2c-sh7760.
430 457
431config I2C_SIBYTE 458config I2C_SH_MOBILE
432 tristate "SiByte SMBus interface" 459 tristate "SuperH Mobile I2C Controller"
433 depends on SIBYTE_SB1xxx_SOC 460 depends on SUPERH
434 help 461 help
435 Supports the SiByte SOC on-chip I2C interfaces (2 channels). 462 If you say yes to this option, support will be included for the
463 built-in I2C interface on the Renesas SH-Mobile processor.
464
465 This driver can also be built as a module. If so, the module
466 will be called i2c-sh_mobile.
436 467
437config I2C_SIMTEC 468config I2C_SIMTEC
438 tristate "Simtec Generic I2C interface" 469 tristate "Simtec Generic I2C interface"
@@ -446,86 +477,65 @@ config I2C_SIMTEC
446 This driver can also be built as a module. If so, the module 477 This driver can also be built as a module. If so, the module
447 will be called i2c-simtec. 478 will be called i2c-simtec.
448 479
449config SCx200_I2C 480config I2C_VERSATILE
450 tristate "NatSemi SCx200 I2C using GPIO pins (DEPRECATED)" 481 tristate "ARM Versatile/Realview I2C bus support"
451 depends on SCx200_GPIO 482 depends on ARCH_VERSATILE || ARCH_REALVIEW
452 select I2C_ALGOBIT 483 select I2C_ALGOBIT
453 help 484 help
454 Enable the use of two GPIO pins of a SCx200 processor as an I2C bus. 485 Say yes if you want to support the I2C serial bus on ARMs Versatile
455 486 range of platforms.
456 If you don't know what to do here, say N.
457 487
458 This support is also available as a module. If so, the module 488 This driver can also be built as a module. If so, the module
459 will be called scx200_i2c. 489 will be called i2c-versatile.
460 490
461 This driver is deprecated and will be dropped soon. Use i2c-gpio 491comment "External I2C/SMBus adapter drivers"
462 (or scx200_acb) instead.
463 492
464config SCx200_I2C_SCL 493config I2C_PARPORT
465 int "GPIO pin used for SCL" 494 tristate "Parallel port adapter"
466 depends on SCx200_I2C 495 depends on PARPORT
467 default "12" 496 select I2C_ALGOBIT
468 help 497 help
469 Enter the GPIO pin number used for the SCL signal. This value can 498 This supports parallel port I2C adapters such as the ones made by
470 also be specified with a module parameter. 499 Philips or Velleman, Analog Devices evaluation boards, and more.
500 Basically any adapter using the parallel port as an I2C bus with
501 no extra chipset is supported by this driver, or could be.
471 502
472config SCx200_I2C_SDA 503 This driver is a replacement for (and was inspired by) an older
473 int "GPIO pin used for SDA" 504 driver named i2c-philips-par. The new driver supports more devices,
474 depends on SCx200_I2C 505 and makes it easier to add support for new devices.
475 default "13"
476 help
477 Enter the GPIO pin number used for the SSA signal. This value can
478 also be specified with a module parameter.
479 506
480config SCx200_ACB 507 An adapter type parameter is now mandatory. Please read the file
481 tristate "Geode ACCESS.bus support" 508 Documentation/i2c/busses/i2c-parport for details.
482 depends on X86_32 && PCI
483 help
484 Enable the use of the ACCESS.bus controllers on the Geode SCx200 and
485 SC1100 processors and the CS5535 and CS5536 Geode companion devices.
486 509
487 If you don't know what to do here, say N. 510 Another driver exists, named i2c-parport-light, which doesn't depend
511 on the parport driver. This is meant for embedded systems. Don't say
512 Y here if you intend to say Y or M there.
488 513
489 This support is also available as a module. If so, the module 514 This support is also available as a module. If so, the module
490 will be called scx200_acb. 515 will be called i2c-parport.
491
492config I2C_SIS5595
493 tristate "SiS 5595"
494 depends on PCI
495 help
496 If you say yes to this option, support will be included for the
497 SiS5595 SMBus (a subset of I2C) interface.
498
499 This driver can also be built as a module. If so, the module
500 will be called i2c-sis5595.
501 516
502config I2C_SIS630 517config I2C_PARPORT_LIGHT
503 tristate "SiS 630/730" 518 tristate "Parallel port adapter (light)"
504 depends on PCI 519 select I2C_ALGOBIT
505 help 520 help
506 If you say yes to this option, support will be included for the 521 This supports parallel port I2C adapters such as the ones made by
507 SiS630 and SiS730 SMBus (a subset of I2C) interface. 522 Philips or Velleman, Analog Devices evaluation boards, and more.
523 Basically any adapter using the parallel port as an I2C bus with
524 no extra chipset is supported by this driver, or could be.
508 525
509 This driver can also be built as a module. If so, the module 526 This driver is a light version of i2c-parport. It doesn't depend
510 will be called i2c-sis630. 527 on the parport driver, and uses direct I/O access instead. This
528 might be preferred on embedded systems where wasting memory for
529 the clean but heavy parport handling is not an option. The
530 drawback is a reduced portability and the impossibility to
531 daisy-chain other parallel port devices.
511 532
512config I2C_SIS96X 533 Don't say Y here if you said Y or M to i2c-parport. Saying M to
513 tristate "SiS 96x" 534 both is possible but both modules should not be loaded at the same
514 depends on PCI 535 time.
515 help
516 If you say yes to this option, support will be included for the SiS
517 96x SMBus (a subset of I2C) interfaces. Specifically, the following
518 chipsets are supported:
519 645/961
520 645DX/961
521 645DX/962
522 648/961
523 650/961
524 735
525 745
526 536
527 This driver can also be built as a module. If so, the module 537 This support is also available as a module. If so, the module
528 will be called i2c-sis96x. 538 will be called i2c-parport-light.
529 539
530config I2C_TAOS_EVM 540config I2C_TAOS_EVM
531 tristate "TAOS evaluation module" 541 tristate "TAOS evaluation module"
@@ -543,21 +553,8 @@ config I2C_TAOS_EVM
543 This support is also available as a module. If so, the module 553 This support is also available as a module. If so, the module
544 will be called i2c-taos-evm. 554 will be called i2c-taos-evm.
545 555
546config I2C_STUB
547 tristate "I2C/SMBus Test Stub"
548 depends on EXPERIMENTAL && m
549 default 'n'
550 help
551 This module may be useful to developers of SMBus client drivers,
552 especially for certain kinds of sensor chips.
553
554 If you do build this module, be sure to read the notes and warnings
555 in <file:Documentation/i2c/i2c-stub>.
556
557 If you don't know what to do here, definitely say N.
558
559config I2C_TINY_USB 556config I2C_TINY_USB
560 tristate "I2C-Tiny-USB" 557 tristate "Tiny-USB adapter"
561 depends on USB 558 depends on USB
562 help 559 help
563 If you say yes to this option, support will be included for the 560 If you say yes to this option, support will be included for the
@@ -567,16 +564,21 @@ config I2C_TINY_USB
567 This driver can also be built as a module. If so, the module 564 This driver can also be built as a module. If so, the module
568 will be called i2c-tiny-usb. 565 will be called i2c-tiny-usb.
569 566
570config I2C_VERSATILE 567comment "Graphics adapter I2C/DDC channel drivers"
571 tristate "ARM Versatile/Realview I2C bus support" 568 depends on PCI
572 depends on ARCH_VERSATILE || ARCH_REALVIEW 569
570config I2C_VOODOO3
571 tristate "Voodoo 3"
572 depends on PCI
573 select I2C_ALGOBIT 573 select I2C_ALGOBIT
574 help 574 help
575 Say yes if you want to support the I2C serial bus on ARMs Versatile 575 If you say yes to this option, support will be included for the
576 range of platforms. 576 Voodoo 3 I2C interface.
577 577
578 This driver can also be built as a module. If so, the module 578 This driver can also be built as a module. If so, the module
579 will be called i2c-versatile. 579 will be called i2c-voodoo3.
580
581comment "Other I2C/SMBus bus drivers"
580 582
581config I2C_ACORN 583config I2C_ACORN
582 tristate "Acorn IOC/IOMD I2C bus support" 584 tristate "Acorn IOC/IOMD I2C bus support"
@@ -588,46 +590,16 @@ config I2C_ACORN
588 590
589 If you don't know, say Y. 591 If you don't know, say Y.
590 592
591config I2C_VIA 593config I2C_ELEKTOR
592 tristate "VIA 82C586B" 594 tristate "Elektor ISA card"
593 depends on PCI && EXPERIMENTAL 595 depends on ISA && BROKEN_ON_SMP
594 select I2C_ALGOBIT 596 select I2C_ALGOPCF
595 help
596 If you say yes to this option, support will be included for the VIA
597 82C586B I2C interface
598
599 This driver can also be built as a module. If so, the module
600 will be called i2c-via.
601
602config I2C_VIAPRO
603 tristate "VIA VT82C596/82C686/82xx and CX700"
604 depends on PCI
605 help
606 If you say yes to this option, support will be included for the VIA
607 VT82C596 and later SMBus interface. Specifically, the following
608 chipsets are supported:
609 VT82C596A/B
610 VT82C686A/B
611 VT8231
612 VT8233/A
613 VT8235
614 VT8237R/A/S
615 VT8251
616 CX700
617
618 This driver can also be built as a module. If so, the module
619 will be called i2c-viapro.
620
621config I2C_VOODOO3
622 tristate "Voodoo 3"
623 depends on PCI
624 select I2C_ALGOBIT
625 help 597 help
626 If you say yes to this option, support will be included for the 598 This supports the PCF8584 ISA bus I2C adapter. Say Y if you own
627 Voodoo 3 I2C interface. 599 such an adapter.
628 600
629 This driver can also be built as a module. If so, the module 601 This support is also available as a module. If so, the module
630 will be called i2c-voodoo3. 602 will be called i2c-elektor.
631 603
632config I2C_PCA_ISA 604config I2C_PCA_ISA
633 tristate "PCA9564 on an ISA bus" 605 tristate "PCA9564 on an ISA bus"
@@ -657,26 +629,6 @@ config I2C_PCA_PLATFORM
657 This driver can also be built as a module. If so, the module 629 This driver can also be built as a module. If so, the module
658 will be called i2c-pca-platform. 630 will be called i2c-pca-platform.
659 631
660config I2C_MV64XXX
661 tristate "Marvell mv64xxx I2C Controller"
662 depends on (MV64X60 || PLAT_ORION) && EXPERIMENTAL
663 help
664 If you say yes to this option, support will be included for the
665 built-in I2C interface on the Marvell 64xxx line of host bridges.
666
667 This driver can also be built as a module. If so, the module
668 will be called i2c-mv64xxx.
669
670config I2C_PNX
671 tristate "I2C bus support for Philips PNX targets"
672 depends on ARCH_PNX4008
673 help
674 This driver supports the Philips IP3204 I2C IP block master and/or
675 slave controller
676
677 This driver can also be built as a module. If so, the module
678 will be called i2c-pnx.
679
680config I2C_PMCMSP 632config I2C_PMCMSP
681 tristate "PMC MSP I2C TWI Controller" 633 tristate "PMC MSP I2C TWI Controller"
682 depends on PMC_MSP 634 depends on PMC_MSP
@@ -686,23 +638,66 @@ config I2C_PMCMSP
686 This driver can also be built as module. If so, the module 638 This driver can also be built as module. If so, the module
687 will be called i2c-pmcmsp. 639 will be called i2c-pmcmsp.
688 640
689config I2C_SH7760 641config I2C_SIBYTE
690 tristate "Renesas SH7760 I2C Controller" 642 tristate "SiByte SMBus interface"
691 depends on CPU_SUBTYPE_SH7760 643 depends on SIBYTE_SB1xxx_SOC
692 help 644 help
693 This driver supports the 2 I2C interfaces on the Renesas SH7760. 645 Supports the SiByte SOC on-chip I2C interfaces (2 channels).
694 646
695 This driver can also be built as a module. If so, the module 647config I2C_STUB
696 will be called i2c-sh7760. 648 tristate "I2C/SMBus Test Stub"
649 depends on EXPERIMENTAL && m
650 default 'n'
651 help
652 This module may be useful to developers of SMBus client drivers,
653 especially for certain kinds of sensor chips.
697 654
698config I2C_SH_MOBILE 655 If you do build this module, be sure to read the notes and warnings
699 tristate "SuperH Mobile I2C Controller" 656 in <file:Documentation/i2c/i2c-stub>.
700 depends on SUPERH 657
658 If you don't know what to do here, definitely say N.
659
660config SCx200_I2C
661 tristate "NatSemi SCx200 I2C using GPIO pins (DEPRECATED)"
662 depends on SCx200_GPIO
663 select I2C_ALGOBIT
701 help 664 help
702 If you say yes to this option, support will be included for the 665 Enable the use of two GPIO pins of a SCx200 processor as an I2C bus.
703 built-in I2C interface on the Renesas SH-Mobile processor.
704 666
705 This driver can also be built as a module. If so, the module 667 If you don't know what to do here, say N.
706 will be called i2c-sh_mobile. 668
669 This support is also available as a module. If so, the module
670 will be called scx200_i2c.
671
672 This driver is deprecated and will be dropped soon. Use i2c-gpio
673 (or scx200_acb) instead.
674
675config SCx200_I2C_SCL
676 int "GPIO pin used for SCL"
677 depends on SCx200_I2C
678 default "12"
679 help
680 Enter the GPIO pin number used for the SCL signal. This value can
681 also be specified with a module parameter.
682
683config SCx200_I2C_SDA
684 int "GPIO pin used for SDA"
685 depends on SCx200_I2C
686 default "13"
687 help
688 Enter the GPIO pin number used for the SSA signal. This value can
689 also be specified with a module parameter.
690
691config SCx200_ACB
692 tristate "Geode ACCESS.bus support"
693 depends on X86_32 && PCI
694 help
695 Enable the use of the ACCESS.bus controllers on the Geode SCx200 and
696 SC1100 processors and the CS5535 and CS5536 Geode companion devices.
697
698 If you don't know what to do here, say N.
699
700 This support is also available as a module. If so, the module
701 will be called scx200_acb.
707 702
708endmenu 703endmenu
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index e8c882a5ea66..97dbfa2107fe 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -2,57 +2,68 @@
2# Makefile for the i2c bus drivers. 2# Makefile for the i2c bus drivers.
3# 3#
4 4
5# PC SMBus host controller drivers
5obj-$(CONFIG_I2C_ALI1535) += i2c-ali1535.o 6obj-$(CONFIG_I2C_ALI1535) += i2c-ali1535.o
6obj-$(CONFIG_I2C_ALI1563) += i2c-ali1563.o 7obj-$(CONFIG_I2C_ALI1563) += i2c-ali1563.o
7obj-$(CONFIG_I2C_ALI15X3) += i2c-ali15x3.o 8obj-$(CONFIG_I2C_ALI15X3) += i2c-ali15x3.o
8obj-$(CONFIG_I2C_AMD756) += i2c-amd756.o 9obj-$(CONFIG_I2C_AMD756) += i2c-amd756.o
9obj-$(CONFIG_I2C_AMD756_S4882) += i2c-amd756-s4882.o 10obj-$(CONFIG_I2C_AMD756_S4882) += i2c-amd756-s4882.o
10obj-$(CONFIG_I2C_AMD8111) += i2c-amd8111.o 11obj-$(CONFIG_I2C_AMD8111) += i2c-amd8111.o
12obj-$(CONFIG_I2C_I801) += i2c-i801.o
13obj-$(CONFIG_I2C_ISCH) += i2c-isch.o
14obj-$(CONFIG_I2C_NFORCE2) += i2c-nforce2.o
15obj-$(CONFIG_I2C_NFORCE2_S4985) += i2c-nforce2-s4985.o
16obj-$(CONFIG_I2C_PIIX4) += i2c-piix4.o
17obj-$(CONFIG_I2C_SIS5595) += i2c-sis5595.o
18obj-$(CONFIG_I2C_SIS630) += i2c-sis630.o
19obj-$(CONFIG_I2C_SIS96X) += i2c-sis96x.o
20obj-$(CONFIG_I2C_VIA) += i2c-via.o
21obj-$(CONFIG_I2C_VIAPRO) += i2c-viapro.o
22
23# Mac SMBus host controller drivers
24obj-$(CONFIG_I2C_HYDRA) += i2c-hydra.o
25obj-$(CONFIG_I2C_POWERMAC) += i2c-powermac.o
26
27# Embebbed system I2C/SMBus host controller drivers
11obj-$(CONFIG_I2C_AT91) += i2c-at91.o 28obj-$(CONFIG_I2C_AT91) += i2c-at91.o
12obj-$(CONFIG_I2C_AU1550) += i2c-au1550.o 29obj-$(CONFIG_I2C_AU1550) += i2c-au1550.o
13obj-$(CONFIG_I2C_BLACKFIN_TWI) += i2c-bfin-twi.o 30obj-$(CONFIG_I2C_BLACKFIN_TWI) += i2c-bfin-twi.o
31obj-$(CONFIG_I2C_CPM) += i2c-cpm.o
14obj-$(CONFIG_I2C_DAVINCI) += i2c-davinci.o 32obj-$(CONFIG_I2C_DAVINCI) += i2c-davinci.o
15obj-$(CONFIG_I2C_ELEKTOR) += i2c-elektor.o
16obj-$(CONFIG_I2C_GPIO) += i2c-gpio.o 33obj-$(CONFIG_I2C_GPIO) += i2c-gpio.o
17obj-$(CONFIG_I2C_HYDRA) += i2c-hydra.o
18obj-$(CONFIG_I2C_I801) += i2c-i801.o
19obj-$(CONFIG_I2C_I810) += i2c-i810.o
20obj-$(CONFIG_I2C_IBM_IIC) += i2c-ibm_iic.o 34obj-$(CONFIG_I2C_IBM_IIC) += i2c-ibm_iic.o
21obj-$(CONFIG_I2C_IOP3XX) += i2c-iop3xx.o 35obj-$(CONFIG_I2C_IOP3XX) += i2c-iop3xx.o
22obj-$(CONFIG_I2C_IXP2000) += i2c-ixp2000.o 36obj-$(CONFIG_I2C_IXP2000) += i2c-ixp2000.o
23obj-$(CONFIG_I2C_POWERMAC) += i2c-powermac.o
24obj-$(CONFIG_I2C_MPC) += i2c-mpc.o 37obj-$(CONFIG_I2C_MPC) += i2c-mpc.o
25obj-$(CONFIG_I2C_MV64XXX) += i2c-mv64xxx.o 38obj-$(CONFIG_I2C_MV64XXX) += i2c-mv64xxx.o
26obj-$(CONFIG_I2C_NFORCE2) += i2c-nforce2.o
27obj-$(CONFIG_I2C_OCORES) += i2c-ocores.o 39obj-$(CONFIG_I2C_OCORES) += i2c-ocores.o
28obj-$(CONFIG_I2C_OMAP) += i2c-omap.o 40obj-$(CONFIG_I2C_OMAP) += i2c-omap.o
29obj-$(CONFIG_I2C_PARPORT) += i2c-parport.o
30obj-$(CONFIG_I2C_PARPORT_LIGHT) += i2c-parport-light.o
31obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi.o 41obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi.o
32obj-$(CONFIG_I2C_PCA_ISA) += i2c-pca-isa.o
33obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o
34obj-$(CONFIG_I2C_PIIX4) += i2c-piix4.o
35obj-$(CONFIG_I2C_PMCMSP) += i2c-pmcmsp.o
36obj-$(CONFIG_I2C_PNX) += i2c-pnx.o 42obj-$(CONFIG_I2C_PNX) += i2c-pnx.o
37obj-$(CONFIG_I2C_PROSAVAGE) += i2c-prosavage.o
38obj-$(CONFIG_I2C_PXA) += i2c-pxa.o 43obj-$(CONFIG_I2C_PXA) += i2c-pxa.o
39obj-$(CONFIG_I2C_S3C2410) += i2c-s3c2410.o 44obj-$(CONFIG_I2C_S3C2410) += i2c-s3c2410.o
40obj-$(CONFIG_I2C_SAVAGE4) += i2c-savage4.o
41obj-$(CONFIG_I2C_SH7760) += i2c-sh7760.o 45obj-$(CONFIG_I2C_SH7760) += i2c-sh7760.o
42obj-$(CONFIG_I2C_SH_MOBILE) += i2c-sh_mobile.o 46obj-$(CONFIG_I2C_SH_MOBILE) += i2c-sh_mobile.o
43obj-$(CONFIG_I2C_SIBYTE) += i2c-sibyte.o
44obj-$(CONFIG_I2C_SIMTEC) += i2c-simtec.o 47obj-$(CONFIG_I2C_SIMTEC) += i2c-simtec.o
45obj-$(CONFIG_I2C_SIS5595) += i2c-sis5595.o 48obj-$(CONFIG_I2C_VERSATILE) += i2c-versatile.o
46obj-$(CONFIG_I2C_SIS630) += i2c-sis630.o 49
47obj-$(CONFIG_I2C_SIS96X) += i2c-sis96x.o 50# External I2C/SMBus adapter drivers
48obj-$(CONFIG_I2C_STUB) += i2c-stub.o 51obj-$(CONFIG_I2C_PARPORT) += i2c-parport.o
52obj-$(CONFIG_I2C_PARPORT_LIGHT) += i2c-parport-light.o
49obj-$(CONFIG_I2C_TAOS_EVM) += i2c-taos-evm.o 53obj-$(CONFIG_I2C_TAOS_EVM) += i2c-taos-evm.o
50obj-$(CONFIG_I2C_TINY_USB) += i2c-tiny-usb.o 54obj-$(CONFIG_I2C_TINY_USB) += i2c-tiny-usb.o
51obj-$(CONFIG_I2C_VERSATILE) += i2c-versatile.o 55
52obj-$(CONFIG_I2C_ACORN) += i2c-acorn.o 56# Graphics adapter I2C/DDC channel drivers
53obj-$(CONFIG_I2C_VIA) += i2c-via.o
54obj-$(CONFIG_I2C_VIAPRO) += i2c-viapro.o
55obj-$(CONFIG_I2C_VOODOO3) += i2c-voodoo3.o 57obj-$(CONFIG_I2C_VOODOO3) += i2c-voodoo3.o
58
59# Other I2C/SMBus bus drivers
60obj-$(CONFIG_I2C_ACORN) += i2c-acorn.o
61obj-$(CONFIG_I2C_ELEKTOR) += i2c-elektor.o
62obj-$(CONFIG_I2C_PCA_ISA) += i2c-pca-isa.o
63obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o
64obj-$(CONFIG_I2C_PMCMSP) += i2c-pmcmsp.o
65obj-$(CONFIG_I2C_SIBYTE) += i2c-sibyte.o
66obj-$(CONFIG_I2C_STUB) += i2c-stub.o
56obj-$(CONFIG_SCx200_ACB) += scx200_acb.o 67obj-$(CONFIG_SCx200_ACB) += scx200_acb.o
57obj-$(CONFIG_SCx200_I2C) += scx200_i2c.o 68obj-$(CONFIG_SCx200_I2C) += scx200_i2c.o
58 69
diff --git a/drivers/i2c/busses/i2c-ali1535.c b/drivers/i2c/busses/i2c-ali1535.c
index f14372ac2fc5..9cead9b9458e 100644
--- a/drivers/i2c/busses/i2c-ali1535.c
+++ b/drivers/i2c/busses/i2c-ali1535.c
@@ -1,6 +1,4 @@
1/* 1/*
2 i2c-ali1535.c - Part of lm_sensors, Linux kernel modules for hardware
3 monitoring
4 Copyright (c) 2000 Frodo Looijaard <frodol@dds.nl>, 2 Copyright (c) 2000 Frodo Looijaard <frodol@dds.nl>,
5 Philip Edelbrock <phil@netroedge.com>, 3 Philip Edelbrock <phil@netroedge.com>,
6 Mark D. Studebaker <mdsxyz123@yahoo.com>, 4 Mark D. Studebaker <mdsxyz123@yahoo.com>,
@@ -61,6 +59,7 @@
61#include <linux/ioport.h> 59#include <linux/ioport.h>
62#include <linux/i2c.h> 60#include <linux/i2c.h>
63#include <linux/init.h> 61#include <linux/init.h>
62#include <linux/acpi.h>
64#include <asm/io.h> 63#include <asm/io.h>
65 64
66 65
@@ -159,6 +158,11 @@ static int ali1535_setup(struct pci_dev *dev)
159 goto exit; 158 goto exit;
160 } 159 }
161 160
161 retval = acpi_check_region(ali1535_smba, ALI1535_SMB_IOSIZE,
162 ali1535_driver.name);
163 if (retval)
164 goto exit;
165
162 if (!request_region(ali1535_smba, ALI1535_SMB_IOSIZE, 166 if (!request_region(ali1535_smba, ALI1535_SMB_IOSIZE,
163 ali1535_driver.name)) { 167 ali1535_driver.name)) {
164 dev_err(&dev->dev, "ALI1535_smb region 0x%x already in use!\n", 168 dev_err(&dev->dev, "ALI1535_smb region 0x%x already in use!\n",
@@ -259,7 +263,7 @@ static int ali1535_transaction(struct i2c_adapter *adap)
259 dev_err(&adap->dev, 263 dev_err(&adap->dev,
260 "SMBus reset failed! (0x%02x) - controller or " 264 "SMBus reset failed! (0x%02x) - controller or "
261 "device on bus is probably hung\n", temp); 265 "device on bus is probably hung\n", temp);
262 return -1; 266 return -EBUSY;
263 } 267 }
264 } else { 268 } else {
265 /* check and clear done bit */ 269 /* check and clear done bit */
@@ -281,12 +285,12 @@ static int ali1535_transaction(struct i2c_adapter *adap)
281 285
282 /* If the SMBus is still busy, we give up */ 286 /* If the SMBus is still busy, we give up */
283 if (timeout >= MAX_TIMEOUT) { 287 if (timeout >= MAX_TIMEOUT) {
284 result = -1; 288 result = -ETIMEDOUT;
285 dev_err(&adap->dev, "SMBus Timeout!\n"); 289 dev_err(&adap->dev, "SMBus Timeout!\n");
286 } 290 }
287 291
288 if (temp & ALI1535_STS_FAIL) { 292 if (temp & ALI1535_STS_FAIL) {
289 result = -1; 293 result = -EIO;
290 dev_dbg(&adap->dev, "Error: Failed bus transaction\n"); 294 dev_dbg(&adap->dev, "Error: Failed bus transaction\n");
291 } 295 }
292 296
@@ -295,7 +299,7 @@ static int ali1535_transaction(struct i2c_adapter *adap)
295 * do a printk. This means that bus collisions go unreported. 299 * do a printk. This means that bus collisions go unreported.
296 */ 300 */
297 if (temp & ALI1535_STS_BUSERR) { 301 if (temp & ALI1535_STS_BUSERR) {
298 result = -1; 302 result = -ENXIO;
299 dev_dbg(&adap->dev, 303 dev_dbg(&adap->dev,
300 "Error: no response or bus collision ADD=%02x\n", 304 "Error: no response or bus collision ADD=%02x\n",
301 inb_p(SMBHSTADD)); 305 inb_p(SMBHSTADD));
@@ -303,13 +307,13 @@ static int ali1535_transaction(struct i2c_adapter *adap)
303 307
304 /* haven't ever seen this */ 308 /* haven't ever seen this */
305 if (temp & ALI1535_STS_DEV) { 309 if (temp & ALI1535_STS_DEV) {
306 result = -1; 310 result = -EIO;
307 dev_err(&adap->dev, "Error: device error\n"); 311 dev_err(&adap->dev, "Error: device error\n");
308 } 312 }
309 313
310 /* check to see if the "command complete" indication is set */ 314 /* check to see if the "command complete" indication is set */
311 if (!(temp & ALI1535_STS_DONE)) { 315 if (!(temp & ALI1535_STS_DONE)) {
312 result = -1; 316 result = -ETIMEDOUT;
313 dev_err(&adap->dev, "Error: command never completed\n"); 317 dev_err(&adap->dev, "Error: command never completed\n");
314 } 318 }
315 319
@@ -332,7 +336,7 @@ static int ali1535_transaction(struct i2c_adapter *adap)
332 return result; 336 return result;
333} 337}
334 338
335/* Return -1 on error. */ 339/* Return negative errno on error. */
336static s32 ali1535_access(struct i2c_adapter *adap, u16 addr, 340static s32 ali1535_access(struct i2c_adapter *adap, u16 addr,
337 unsigned short flags, char read_write, u8 command, 341 unsigned short flags, char read_write, u8 command,
338 int size, union i2c_smbus_data *data) 342 int size, union i2c_smbus_data *data)
@@ -357,10 +361,6 @@ static s32 ali1535_access(struct i2c_adapter *adap, u16 addr,
357 outb_p(0xFF, SMBHSTSTS); 361 outb_p(0xFF, SMBHSTSTS);
358 362
359 switch (size) { 363 switch (size) {
360 case I2C_SMBUS_PROC_CALL:
361 dev_err(&adap->dev, "I2C_SMBUS_PROC_CALL not supported!\n");
362 result = -1;
363 goto EXIT;
364 case I2C_SMBUS_QUICK: 364 case I2C_SMBUS_QUICK:
365 outb_p(((addr & 0x7f) << 1) | (read_write & 0x01), 365 outb_p(((addr & 0x7f) << 1) | (read_write & 0x01),
366 SMBHSTADD); 366 SMBHSTADD);
@@ -418,13 +418,15 @@ static s32 ali1535_access(struct i2c_adapter *adap, u16 addr,
418 outb_p(data->block[i], SMBBLKDAT); 418 outb_p(data->block[i], SMBBLKDAT);
419 } 419 }
420 break; 420 break;
421 default:
422 dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
423 result = -EOPNOTSUPP;
424 goto EXIT;
421 } 425 }
422 426
423 if (ali1535_transaction(adap)) { 427 result = ali1535_transaction(adap);
424 /* Error in transaction */ 428 if (result)
425 result = -1;
426 goto EXIT; 429 goto EXIT;
427 }
428 430
429 if ((read_write == I2C_SMBUS_WRITE) || (size == ALI1535_QUICK)) { 431 if ((read_write == I2C_SMBUS_WRITE) || (size == ALI1535_QUICK)) {
430 result = 0; 432 result = 0;
@@ -475,7 +477,7 @@ static const struct i2c_algorithm smbus_algorithm = {
475static struct i2c_adapter ali1535_adapter = { 477static struct i2c_adapter ali1535_adapter = {
476 .owner = THIS_MODULE, 478 .owner = THIS_MODULE,
477 .id = I2C_HW_SMBUS_ALI1535, 479 .id = I2C_HW_SMBUS_ALI1535,
478 .class = I2C_CLASS_HWMON, 480 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
479 .algo = &smbus_algorithm, 481 .algo = &smbus_algorithm,
480}; 482};
481 483
diff --git a/drivers/i2c/busses/i2c-ali1563.c b/drivers/i2c/busses/i2c-ali1563.c
index 6b68074e518a..fc3e5b026423 100644
--- a/drivers/i2c/busses/i2c-ali1563.c
+++ b/drivers/i2c/busses/i2c-ali1563.c
@@ -21,6 +21,7 @@
21#include <linux/i2c.h> 21#include <linux/i2c.h>
22#include <linux/pci.h> 22#include <linux/pci.h>
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/acpi.h>
24 25
25#define ALI1563_MAX_TIMEOUT 500 26#define ALI1563_MAX_TIMEOUT 500
26#define ALI1563_SMBBA 0x80 27#define ALI1563_SMBBA 0x80
@@ -67,6 +68,7 @@ static int ali1563_transaction(struct i2c_adapter * a, int size)
67{ 68{
68 u32 data; 69 u32 data;
69 int timeout; 70 int timeout;
71 int status = -EIO;
70 72
71 dev_dbg(&a->dev, "Transaction (pre): STS=%02x, CNTL1=%02x, " 73 dev_dbg(&a->dev, "Transaction (pre): STS=%02x, CNTL1=%02x, "
72 "CNTL2=%02x, CMD=%02x, ADD=%02x, DAT0=%02x, DAT1=%02x\n", 74 "CNTL2=%02x, CMD=%02x, ADD=%02x, DAT0=%02x, DAT1=%02x\n",
@@ -103,13 +105,15 @@ static int ali1563_transaction(struct i2c_adapter * a, int size)
103 /* Issue 'kill' to host controller */ 105 /* Issue 'kill' to host controller */
104 outb_p(HST_CNTL2_KILL,SMB_HST_CNTL2); 106 outb_p(HST_CNTL2_KILL,SMB_HST_CNTL2);
105 data = inb_p(SMB_HST_STS); 107 data = inb_p(SMB_HST_STS);
108 status = -ETIMEDOUT;
106 } 109 }
107 110
108 /* device error - no response, ignore the autodetection case */ 111 /* device error - no response, ignore the autodetection case */
109 if ((data & HST_STS_DEVERR) && (size != HST_CNTL2_QUICK)) { 112 if (data & HST_STS_DEVERR) {
110 dev_err(&a->dev, "Device error!\n"); 113 if (size != HST_CNTL2_QUICK)
114 dev_err(&a->dev, "Device error!\n");
115 status = -ENXIO;
111 } 116 }
112
113 /* bus collision */ 117 /* bus collision */
114 if (data & HST_STS_BUSERR) { 118 if (data & HST_STS_BUSERR) {
115 dev_err(&a->dev, "Bus collision!\n"); 119 dev_err(&a->dev, "Bus collision!\n");
@@ -122,13 +126,14 @@ static int ali1563_transaction(struct i2c_adapter * a, int size)
122 outb_p(0x0,SMB_HST_CNTL2); 126 outb_p(0x0,SMB_HST_CNTL2);
123 } 127 }
124 128
125 return -1; 129 return status;
126} 130}
127 131
128static int ali1563_block_start(struct i2c_adapter * a) 132static int ali1563_block_start(struct i2c_adapter * a)
129{ 133{
130 u32 data; 134 u32 data;
131 int timeout; 135 int timeout;
136 int status = -EIO;
132 137
133 dev_dbg(&a->dev, "Block (pre): STS=%02x, CNTL1=%02x, " 138 dev_dbg(&a->dev, "Block (pre): STS=%02x, CNTL1=%02x, "
134 "CNTL2=%02x, CMD=%02x, ADD=%02x, DAT0=%02x, DAT1=%02x\n", 139 "CNTL2=%02x, CMD=%02x, ADD=%02x, DAT0=%02x, DAT1=%02x\n",
@@ -164,13 +169,20 @@ static int ali1563_block_start(struct i2c_adapter * a)
164 169
165 if (timeout && !(data & HST_STS_BAD)) 170 if (timeout && !(data & HST_STS_BAD))
166 return 0; 171 return 0;
172
173 if (timeout == 0)
174 status = -ETIMEDOUT;
175
176 if (data & HST_STS_DEVERR)
177 status = -ENXIO;
178
167 dev_err(&a->dev, "SMBus Error: %s%s%s%s%s\n", 179 dev_err(&a->dev, "SMBus Error: %s%s%s%s%s\n",
168 timeout ? "Timeout " : "", 180 timeout ? "" : "Timeout ",
169 data & HST_STS_FAIL ? "Transaction Failed " : "", 181 data & HST_STS_FAIL ? "Transaction Failed " : "",
170 data & HST_STS_BUSERR ? "No response or Bus Collision " : "", 182 data & HST_STS_BUSERR ? "No response or Bus Collision " : "",
171 data & HST_STS_DEVERR ? "Device Error " : "", 183 data & HST_STS_DEVERR ? "Device Error " : "",
172 !(data & HST_STS_DONE) ? "Transaction Never Finished " : ""); 184 !(data & HST_STS_DONE) ? "Transaction Never Finished " : "");
173 return -1; 185 return status;
174} 186}
175 187
176static int ali1563_block(struct i2c_adapter * a, union i2c_smbus_data * data, u8 rw) 188static int ali1563_block(struct i2c_adapter * a, union i2c_smbus_data * data, u8 rw)
@@ -235,10 +247,6 @@ static s32 ali1563_access(struct i2c_adapter * a, u16 addr,
235 247
236 /* Map the size to what the chip understands */ 248 /* Map the size to what the chip understands */
237 switch (size) { 249 switch (size) {
238 case I2C_SMBUS_PROC_CALL:
239 dev_err(&a->dev, "I2C_SMBUS_PROC_CALL not supported!\n");
240 error = -EINVAL;
241 break;
242 case I2C_SMBUS_QUICK: 250 case I2C_SMBUS_QUICK:
243 size = HST_CNTL2_QUICK; 251 size = HST_CNTL2_QUICK;
244 break; 252 break;
@@ -254,6 +262,10 @@ static s32 ali1563_access(struct i2c_adapter * a, u16 addr,
254 case I2C_SMBUS_BLOCK_DATA: 262 case I2C_SMBUS_BLOCK_DATA:
255 size = HST_CNTL2_BLOCK; 263 size = HST_CNTL2_BLOCK;
256 break; 264 break;
265 default:
266 dev_warn(&a->dev, "Unsupported transaction %d\n", size);
267 error = -EOPNOTSUPP;
268 goto Done;
257 } 269 }
258 270
259 outb_p(((addr & 0x7f) << 1) | (rw & 0x01), SMB_HST_ADD); 271 outb_p(((addr & 0x7f) << 1) | (rw & 0x01), SMB_HST_ADD);
@@ -345,6 +357,10 @@ static int __devinit ali1563_setup(struct pci_dev * dev)
345 } 357 }
346 } 358 }
347 359
360 if (acpi_check_region(ali1563_smba, ALI1563_SMB_IOSIZE,
361 ali1563_pci_driver.name))
362 goto Err;
363
348 if (!request_region(ali1563_smba, ALI1563_SMB_IOSIZE, 364 if (!request_region(ali1563_smba, ALI1563_SMB_IOSIZE,
349 ali1563_pci_driver.name)) { 365 ali1563_pci_driver.name)) {
350 dev_err(&dev->dev, "Could not allocate I/O space at 0x%04x\n", 366 dev_err(&dev->dev, "Could not allocate I/O space at 0x%04x\n",
@@ -371,7 +387,7 @@ static const struct i2c_algorithm ali1563_algorithm = {
371static struct i2c_adapter ali1563_adapter = { 387static struct i2c_adapter ali1563_adapter = {
372 .owner = THIS_MODULE, 388 .owner = THIS_MODULE,
373 .id = I2C_HW_SMBUS_ALI1563, 389 .id = I2C_HW_SMBUS_ALI1563,
374 .class = I2C_CLASS_HWMON, 390 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
375 .algo = &ali1563_algorithm, 391 .algo = &ali1563_algorithm,
376}; 392};
377 393
diff --git a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c
index 93bf87d70961..234fdde7d40e 100644
--- a/drivers/i2c/busses/i2c-ali15x3.c
+++ b/drivers/i2c/busses/i2c-ali15x3.c
@@ -1,6 +1,4 @@
1/* 1/*
2 ali15x3.c - Part of lm_sensors, Linux kernel modules for hardware
3 monitoring
4 Copyright (c) 1999 Frodo Looijaard <frodol@dds.nl> and 2 Copyright (c) 1999 Frodo Looijaard <frodol@dds.nl> and
5 Philip Edelbrock <phil@netroedge.com> and 3 Philip Edelbrock <phil@netroedge.com> and
6 Mark D. Studebaker <mdsxyz123@yahoo.com> 4 Mark D. Studebaker <mdsxyz123@yahoo.com>
@@ -68,6 +66,7 @@
68#include <linux/delay.h> 66#include <linux/delay.h>
69#include <linux/i2c.h> 67#include <linux/i2c.h>
70#include <linux/init.h> 68#include <linux/init.h>
69#include <linux/acpi.h>
71#include <asm/io.h> 70#include <asm/io.h>
72 71
73/* ALI15X3 SMBus address offsets */ 72/* ALI15X3 SMBus address offsets */
@@ -166,6 +165,10 @@ static int ali15x3_setup(struct pci_dev *ALI15X3_dev)
166 if(force_addr) 165 if(force_addr)
167 ali15x3_smba = force_addr & ~(ALI15X3_SMB_IOSIZE - 1); 166 ali15x3_smba = force_addr & ~(ALI15X3_SMB_IOSIZE - 1);
168 167
168 if (acpi_check_region(ali15x3_smba, ALI15X3_SMB_IOSIZE,
169 ali15x3_driver.name))
170 return -EBUSY;
171
169 if (!request_region(ali15x3_smba, ALI15X3_SMB_IOSIZE, 172 if (!request_region(ali15x3_smba, ALI15X3_SMB_IOSIZE,
170 ali15x3_driver.name)) { 173 ali15x3_driver.name)) {
171 dev_err(&ALI15X3_dev->dev, 174 dev_err(&ALI15X3_dev->dev,
@@ -282,7 +285,7 @@ static int ali15x3_transaction(struct i2c_adapter *adap)
282 dev_err(&adap->dev, "SMBus reset failed! (0x%02x) - " 285 dev_err(&adap->dev, "SMBus reset failed! (0x%02x) - "
283 "controller or device on bus is probably hung\n", 286 "controller or device on bus is probably hung\n",
284 temp); 287 temp);
285 return -1; 288 return -EBUSY;
286 } 289 }
287 } else { 290 } else {
288 /* check and clear done bit */ 291 /* check and clear done bit */
@@ -304,12 +307,12 @@ static int ali15x3_transaction(struct i2c_adapter *adap)
304 307
305 /* If the SMBus is still busy, we give up */ 308 /* If the SMBus is still busy, we give up */
306 if (timeout >= MAX_TIMEOUT) { 309 if (timeout >= MAX_TIMEOUT) {
307 result = -1; 310 result = -ETIMEDOUT;
308 dev_err(&adap->dev, "SMBus Timeout!\n"); 311 dev_err(&adap->dev, "SMBus Timeout!\n");
309 } 312 }
310 313
311 if (temp & ALI15X3_STS_TERM) { 314 if (temp & ALI15X3_STS_TERM) {
312 result = -1; 315 result = -EIO;
313 dev_dbg(&adap->dev, "Error: Failed bus transaction\n"); 316 dev_dbg(&adap->dev, "Error: Failed bus transaction\n");
314 } 317 }
315 318
@@ -320,7 +323,7 @@ static int ali15x3_transaction(struct i2c_adapter *adap)
320 This means that bus collisions go unreported. 323 This means that bus collisions go unreported.
321 */ 324 */
322 if (temp & ALI15X3_STS_COLL) { 325 if (temp & ALI15X3_STS_COLL) {
323 result = -1; 326 result = -ENXIO;
324 dev_dbg(&adap->dev, 327 dev_dbg(&adap->dev,
325 "Error: no response or bus collision ADD=%02x\n", 328 "Error: no response or bus collision ADD=%02x\n",
326 inb_p(SMBHSTADD)); 329 inb_p(SMBHSTADD));
@@ -328,7 +331,7 @@ static int ali15x3_transaction(struct i2c_adapter *adap)
328 331
329 /* haven't ever seen this */ 332 /* haven't ever seen this */
330 if (temp & ALI15X3_STS_DEV) { 333 if (temp & ALI15X3_STS_DEV) {
331 result = -1; 334 result = -EIO;
332 dev_err(&adap->dev, "Error: device error\n"); 335 dev_err(&adap->dev, "Error: device error\n");
333 } 336 }
334 dev_dbg(&adap->dev, "Transaction (post): STS=%02x, CNT=%02x, CMD=%02x, " 337 dev_dbg(&adap->dev, "Transaction (post): STS=%02x, CNT=%02x, CMD=%02x, "
@@ -338,7 +341,7 @@ static int ali15x3_transaction(struct i2c_adapter *adap)
338 return result; 341 return result;
339} 342}
340 343
341/* Return -1 on error. */ 344/* Return negative errno on error. */
342static s32 ali15x3_access(struct i2c_adapter * adap, u16 addr, 345static s32 ali15x3_access(struct i2c_adapter * adap, u16 addr,
343 unsigned short flags, char read_write, u8 command, 346 unsigned short flags, char read_write, u8 command,
344 int size, union i2c_smbus_data * data) 347 int size, union i2c_smbus_data * data)
@@ -362,9 +365,6 @@ static s32 ali15x3_access(struct i2c_adapter * adap, u16 addr,
362 } 365 }
363 366
364 switch (size) { 367 switch (size) {
365 case I2C_SMBUS_PROC_CALL:
366 dev_err(&adap->dev, "I2C_SMBUS_PROC_CALL not supported!\n");
367 return -1;
368 case I2C_SMBUS_QUICK: 368 case I2C_SMBUS_QUICK:
369 outb_p(((addr & 0x7f) << 1) | (read_write & 0x01), 369 outb_p(((addr & 0x7f) << 1) | (read_write & 0x01),
370 SMBHSTADD); 370 SMBHSTADD);
@@ -417,12 +417,16 @@ static s32 ali15x3_access(struct i2c_adapter * adap, u16 addr,
417 } 417 }
418 size = ALI15X3_BLOCK_DATA; 418 size = ALI15X3_BLOCK_DATA;
419 break; 419 break;
420 default:
421 dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
422 return -EOPNOTSUPP;
420 } 423 }
421 424
422 outb_p(size, SMBHSTCNT); /* output command */ 425 outb_p(size, SMBHSTCNT); /* output command */
423 426
424 if (ali15x3_transaction(adap)) /* Error in transaction */ 427 temp = ali15x3_transaction(adap);
425 return -1; 428 if (temp)
429 return temp;
426 430
427 if ((read_write == I2C_SMBUS_WRITE) || (size == ALI15X3_QUICK)) 431 if ((read_write == I2C_SMBUS_WRITE) || (size == ALI15X3_QUICK))
428 return 0; 432 return 0;
@@ -470,7 +474,7 @@ static const struct i2c_algorithm smbus_algorithm = {
470static struct i2c_adapter ali15x3_adapter = { 474static struct i2c_adapter ali15x3_adapter = {
471 .owner = THIS_MODULE, 475 .owner = THIS_MODULE,
472 .id = I2C_HW_SMBUS_ALI15X3, 476 .id = I2C_HW_SMBUS_ALI15X3,
473 .class = I2C_CLASS_HWMON, 477 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
474 .algo = &smbus_algorithm, 478 .algo = &smbus_algorithm,
475}; 479};
476 480
diff --git a/drivers/i2c/busses/i2c-amd756-s4882.c b/drivers/i2c/busses/i2c-amd756-s4882.c
index c38a0a112208..2f150e33c74c 100644
--- a/drivers/i2c/busses/i2c-amd756-s4882.c
+++ b/drivers/i2c/busses/i2c-amd756-s4882.c
@@ -58,7 +58,7 @@ static s32 amd756_access_virt0(struct i2c_adapter * adap, u16 addr,
58 /* We exclude the multiplexed addresses */ 58 /* We exclude the multiplexed addresses */
59 if (addr == 0x4c || (addr & 0xfc) == 0x50 || (addr & 0xfc) == 0x30 59 if (addr == 0x4c || (addr & 0xfc) == 0x50 || (addr & 0xfc) == 0x30
60 || addr == 0x18) 60 || addr == 0x18)
61 return -1; 61 return -ENXIO;
62 62
63 mutex_lock(&amd756_lock); 63 mutex_lock(&amd756_lock);
64 64
@@ -86,7 +86,7 @@ static inline s32 amd756_access_channel(struct i2c_adapter * adap, u16 addr,
86 86
87 /* We exclude the non-multiplexed addresses */ 87 /* We exclude the non-multiplexed addresses */
88 if (addr != 0x4c && (addr & 0xfc) != 0x50 && (addr & 0xfc) != 0x30) 88 if (addr != 0x4c && (addr & 0xfc) != 0x50 && (addr & 0xfc) != 0x30)
89 return -1; 89 return -ENXIO;
90 90
91 mutex_lock(&amd756_lock); 91 mutex_lock(&amd756_lock);
92 92
diff --git a/drivers/i2c/busses/i2c-amd756.c b/drivers/i2c/busses/i2c-amd756.c
index 43508d61eb7c..1ea39254dac6 100644
--- a/drivers/i2c/busses/i2c-amd756.c
+++ b/drivers/i2c/busses/i2c-amd756.c
@@ -1,7 +1,4 @@
1/* 1/*
2 amd756.c - Part of lm_sensors, Linux kernel modules for hardware
3 monitoring
4
5 Copyright (c) 1999-2002 Merlin Hughes <merlin@merlin.org> 2 Copyright (c) 1999-2002 Merlin Hughes <merlin@merlin.org>
6 3
7 Shamelessly ripped from i2c-piix4.c: 4 Shamelessly ripped from i2c-piix4.c:
@@ -45,6 +42,7 @@
45#include <linux/ioport.h> 42#include <linux/ioport.h>
46#include <linux/i2c.h> 43#include <linux/i2c.h>
47#include <linux/init.h> 44#include <linux/init.h>
45#include <linux/acpi.h>
48#include <asm/io.h> 46#include <asm/io.h>
49 47
50/* AMD756 SMBus address offsets */ 48/* AMD756 SMBus address offsets */
@@ -151,17 +149,17 @@ static int amd756_transaction(struct i2c_adapter *adap)
151 } 149 }
152 150
153 if (temp & GS_PRERR_STS) { 151 if (temp & GS_PRERR_STS) {
154 result = -1; 152 result = -ENXIO;
155 dev_dbg(&adap->dev, "SMBus Protocol error (no response)!\n"); 153 dev_dbg(&adap->dev, "SMBus Protocol error (no response)!\n");
156 } 154 }
157 155
158 if (temp & GS_COL_STS) { 156 if (temp & GS_COL_STS) {
159 result = -1; 157 result = -EIO;
160 dev_warn(&adap->dev, "SMBus collision!\n"); 158 dev_warn(&adap->dev, "SMBus collision!\n");
161 } 159 }
162 160
163 if (temp & GS_TO_STS) { 161 if (temp & GS_TO_STS) {
164 result = -1; 162 result = -ETIMEDOUT;
165 dev_dbg(&adap->dev, "SMBus protocol timeout!\n"); 163 dev_dbg(&adap->dev, "SMBus protocol timeout!\n");
166 } 164 }
167 165
@@ -189,22 +187,18 @@ static int amd756_transaction(struct i2c_adapter *adap)
189 outw_p(inw(SMB_GLOBAL_ENABLE) | GE_ABORT, SMB_GLOBAL_ENABLE); 187 outw_p(inw(SMB_GLOBAL_ENABLE) | GE_ABORT, SMB_GLOBAL_ENABLE);
190 msleep(100); 188 msleep(100);
191 outw_p(GS_CLEAR_STS, SMB_GLOBAL_STATUS); 189 outw_p(GS_CLEAR_STS, SMB_GLOBAL_STATUS);
192 return -1; 190 return -EIO;
193} 191}
194 192
195/* Return -1 on error. */ 193/* Return negative errno on error. */
196static s32 amd756_access(struct i2c_adapter * adap, u16 addr, 194static s32 amd756_access(struct i2c_adapter * adap, u16 addr,
197 unsigned short flags, char read_write, 195 unsigned short flags, char read_write,
198 u8 command, int size, union i2c_smbus_data * data) 196 u8 command, int size, union i2c_smbus_data * data)
199{ 197{
200 int i, len; 198 int i, len;
199 int status;
201 200
202 /** TODO: Should I supporte the 10-bit transfers? */
203 switch (size) { 201 switch (size) {
204 case I2C_SMBUS_PROC_CALL:
205 dev_dbg(&adap->dev, "I2C_SMBUS_PROC_CALL not supported!\n");
206 /* TODO: Well... It is supported, I'm just not sure what to do here... */
207 return -1;
208 case I2C_SMBUS_QUICK: 202 case I2C_SMBUS_QUICK:
209 outw_p(((addr & 0x7f) << 1) | (read_write & 0x01), 203 outw_p(((addr & 0x7f) << 1) | (read_write & 0x01),
210 SMB_HOST_ADDRESS); 204 SMB_HOST_ADDRESS);
@@ -251,13 +245,17 @@ static s32 amd756_access(struct i2c_adapter * adap, u16 addr,
251 } 245 }
252 size = AMD756_BLOCK_DATA; 246 size = AMD756_BLOCK_DATA;
253 break; 247 break;
248 default:
249 dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
250 return -EOPNOTSUPP;
254 } 251 }
255 252
256 /* How about enabling interrupts... */ 253 /* How about enabling interrupts... */
257 outw_p(size & GE_CYC_TYPE_MASK, SMB_GLOBAL_ENABLE); 254 outw_p(size & GE_CYC_TYPE_MASK, SMB_GLOBAL_ENABLE);
258 255
259 if (amd756_transaction(adap)) /* Error in transaction */ 256 status = amd756_transaction(adap);
260 return -1; 257 if (status)
258 return status;
261 259
262 if ((read_write == I2C_SMBUS_WRITE) || (size == AMD756_QUICK)) 260 if ((read_write == I2C_SMBUS_WRITE) || (size == AMD756_QUICK))
263 return 0; 261 return 0;
@@ -301,7 +299,7 @@ static const struct i2c_algorithm smbus_algorithm = {
301struct i2c_adapter amd756_smbus = { 299struct i2c_adapter amd756_smbus = {
302 .owner = THIS_MODULE, 300 .owner = THIS_MODULE,
303 .id = I2C_HW_SMBUS_AMD756, 301 .id = I2C_HW_SMBUS_AMD756,
304 .class = I2C_CLASS_HWMON, 302 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
305 .algo = &smbus_algorithm, 303 .algo = &smbus_algorithm,
306}; 304};
307 305
@@ -368,6 +366,11 @@ static int __devinit amd756_probe(struct pci_dev *pdev,
368 amd756_ioport += SMB_ADDR_OFFSET; 366 amd756_ioport += SMB_ADDR_OFFSET;
369 } 367 }
370 368
369 error = acpi_check_region(amd756_ioport, SMB_IOSIZE,
370 amd756_driver.name);
371 if (error)
372 return error;
373
371 if (!request_region(amd756_ioport, SMB_IOSIZE, amd756_driver.name)) { 374 if (!request_region(amd756_ioport, SMB_IOSIZE, amd756_driver.name)) {
372 dev_err(&pdev->dev, "SMB region 0x%x already in use!\n", 375 dev_err(&pdev->dev, "SMB region 0x%x already in use!\n",
373 amd756_ioport); 376 amd756_ioport);
diff --git a/drivers/i2c/busses/i2c-amd8111.c b/drivers/i2c/busses/i2c-amd8111.c
index 5d1a27ef2504..3972208876b3 100644
--- a/drivers/i2c/busses/i2c-amd8111.c
+++ b/drivers/i2c/busses/i2c-amd8111.c
@@ -16,6 +16,7 @@
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/i2c.h> 17#include <linux/i2c.h>
18#include <linux/delay.h> 18#include <linux/delay.h>
19#include <linux/acpi.h>
19#include <asm/io.h> 20#include <asm/io.h>
20 21
21MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
@@ -77,7 +78,7 @@ static unsigned int amd_ec_wait_write(struct amd_smbus *smbus)
77 if (!timeout) { 78 if (!timeout) {
78 dev_warn(&smbus->dev->dev, 79 dev_warn(&smbus->dev->dev,
79 "Timeout while waiting for IBF to clear\n"); 80 "Timeout while waiting for IBF to clear\n");
80 return -1; 81 return -ETIMEDOUT;
81 } 82 }
82 83
83 return 0; 84 return 0;
@@ -93,7 +94,7 @@ static unsigned int amd_ec_wait_read(struct amd_smbus *smbus)
93 if (!timeout) { 94 if (!timeout) {
94 dev_warn(&smbus->dev->dev, 95 dev_warn(&smbus->dev->dev,
95 "Timeout while waiting for OBF to set\n"); 96 "Timeout while waiting for OBF to set\n");
96 return -1; 97 return -ETIMEDOUT;
97 } 98 }
98 99
99 return 0; 100 return 0;
@@ -102,16 +103,21 @@ static unsigned int amd_ec_wait_read(struct amd_smbus *smbus)
102static unsigned int amd_ec_read(struct amd_smbus *smbus, unsigned char address, 103static unsigned int amd_ec_read(struct amd_smbus *smbus, unsigned char address,
103 unsigned char *data) 104 unsigned char *data)
104{ 105{
105 if (amd_ec_wait_write(smbus)) 106 int status;
106 return -1; 107
108 status = amd_ec_wait_write(smbus);
109 if (status)
110 return status;
107 outb(AMD_EC_CMD_RD, smbus->base + AMD_EC_CMD); 111 outb(AMD_EC_CMD_RD, smbus->base + AMD_EC_CMD);
108 112
109 if (amd_ec_wait_write(smbus)) 113 status = amd_ec_wait_write(smbus);
110 return -1; 114 if (status)
115 return status;
111 outb(address, smbus->base + AMD_EC_DATA); 116 outb(address, smbus->base + AMD_EC_DATA);
112 117
113 if (amd_ec_wait_read(smbus)) 118 status = amd_ec_wait_read(smbus);
114 return -1; 119 if (status)
120 return status;
115 *data = inb(smbus->base + AMD_EC_DATA); 121 *data = inb(smbus->base + AMD_EC_DATA);
116 122
117 return 0; 123 return 0;
@@ -120,16 +126,21 @@ static unsigned int amd_ec_read(struct amd_smbus *smbus, unsigned char address,
120static unsigned int amd_ec_write(struct amd_smbus *smbus, unsigned char address, 126static unsigned int amd_ec_write(struct amd_smbus *smbus, unsigned char address,
121 unsigned char data) 127 unsigned char data)
122{ 128{
123 if (amd_ec_wait_write(smbus)) 129 int status;
124 return -1; 130
131 status = amd_ec_wait_write(smbus);
132 if (status)
133 return status;
125 outb(AMD_EC_CMD_WR, smbus->base + AMD_EC_CMD); 134 outb(AMD_EC_CMD_WR, smbus->base + AMD_EC_CMD);
126 135
127 if (amd_ec_wait_write(smbus)) 136 status = amd_ec_wait_write(smbus);
128 return -1; 137 if (status)
138 return status;
129 outb(address, smbus->base + AMD_EC_DATA); 139 outb(address, smbus->base + AMD_EC_DATA);
130 140
131 if (amd_ec_wait_write(smbus)) 141 status = amd_ec_wait_write(smbus);
132 return -1; 142 if (status)
143 return status;
133 outb(data, smbus->base + AMD_EC_DATA); 144 outb(data, smbus->base + AMD_EC_DATA);
134 145
135 return 0; 146 return 0;
@@ -267,12 +278,17 @@ static s32 amd8111_access(struct i2c_adapter * adap, u16 addr,
267 278
268 default: 279 default:
269 dev_warn(&adap->dev, "Unsupported transaction %d\n", size); 280 dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
270 return -1; 281 return -EOPNOTSUPP;
271 } 282 }
272 283
273 amd_ec_write(smbus, AMD_SMB_ADDR, addr << 1); 284 amd_ec_write(smbus, AMD_SMB_ADDR, addr << 1);
274 amd_ec_write(smbus, AMD_SMB_PRTCL, protocol); 285 amd_ec_write(smbus, AMD_SMB_PRTCL, protocol);
275 286
287 /* FIXME this discards status from ec_read(); so temp[0] will
288 * hold stack garbage ... the rest of this routine will act
289 * nonsensically. Ignored ec_write() status might explain
290 * some such failures...
291 */
276 amd_ec_read(smbus, AMD_SMB_STS, temp + 0); 292 amd_ec_read(smbus, AMD_SMB_STS, temp + 0);
277 293
278 if (~temp[0] & AMD_SMB_STS_DONE) { 294 if (~temp[0] & AMD_SMB_STS_DONE) {
@@ -286,7 +302,7 @@ static s32 amd8111_access(struct i2c_adapter * adap, u16 addr,
286 } 302 }
287 303
288 if ((~temp[0] & AMD_SMB_STS_DONE) || (temp[0] & AMD_SMB_STS_STATUS)) 304 if ((~temp[0] & AMD_SMB_STS_DONE) || (temp[0] & AMD_SMB_STS_STATUS))
289 return -1; 305 return -EIO;
290 306
291 if (read_write == I2C_SMBUS_WRITE) 307 if (read_write == I2C_SMBUS_WRITE)
292 return 0; 308 return 0;
@@ -359,6 +375,10 @@ static int __devinit amd8111_probe(struct pci_dev *dev,
359 smbus->base = pci_resource_start(dev, 0); 375 smbus->base = pci_resource_start(dev, 0);
360 smbus->size = pci_resource_len(dev, 0); 376 smbus->size = pci_resource_len(dev, 0);
361 377
378 error = acpi_check_resource_conflict(&dev->resource[0]);
379 if (error)
380 goto out_kfree;
381
362 if (!request_region(smbus->base, smbus->size, amd8111_driver.name)) { 382 if (!request_region(smbus->base, smbus->size, amd8111_driver.name)) {
363 error = -EBUSY; 383 error = -EBUSY;
364 goto out_kfree; 384 goto out_kfree;
@@ -368,7 +388,7 @@ static int __devinit amd8111_probe(struct pci_dev *dev,
368 snprintf(smbus->adapter.name, sizeof(smbus->adapter.name), 388 snprintf(smbus->adapter.name, sizeof(smbus->adapter.name),
369 "SMBus2 AMD8111 adapter at %04x", smbus->base); 389 "SMBus2 AMD8111 adapter at %04x", smbus->base);
370 smbus->adapter.id = I2C_HW_SMBUS_AMD8111; 390 smbus->adapter.id = I2C_HW_SMBUS_AMD8111;
371 smbus->adapter.class = I2C_CLASS_HWMON; 391 smbus->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
372 smbus->adapter.algo = &smbus_algorithm; 392 smbus->adapter.algo = &smbus_algorithm;
373 smbus->adapter.algo_data = smbus; 393 smbus->adapter.algo_data = smbus;
374 394
diff --git a/drivers/i2c/busses/i2c-au1550.c b/drivers/i2c/busses/i2c-au1550.c
index cae9dc89d88c..66a04c2c660f 100644
--- a/drivers/i2c/busses/i2c-au1550.c
+++ b/drivers/i2c/busses/i2c-au1550.c
@@ -269,9 +269,13 @@ static int
269au1550_xfer(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs, int num) 269au1550_xfer(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs, int num)
270{ 270{
271 struct i2c_au1550_data *adap = i2c_adap->algo_data; 271 struct i2c_au1550_data *adap = i2c_adap->algo_data;
272 volatile psc_smb_t *sp = (volatile psc_smb_t *)adap->psc_base;
272 struct i2c_msg *p; 273 struct i2c_msg *p;
273 int i, err = 0; 274 int i, err = 0;
274 275
276 sp->psc_ctrl = PSC_CTRL_ENABLE;
277 au_sync();
278
275 for (i = 0; !err && i < num; i++) { 279 for (i = 0; !err && i < num; i++) {
276 p = &msgs[i]; 280 p = &msgs[i];
277 err = do_address(adap, p->addr, p->flags & I2C_M_RD, 281 err = do_address(adap, p->addr, p->flags & I2C_M_RD,
@@ -288,6 +292,10 @@ au1550_xfer(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs, int num)
288 */ 292 */
289 if (err == 0) 293 if (err == 0)
290 err = num; 294 err = num;
295
296 sp->psc_ctrl = PSC_CTRL_SUSPEND;
297 au_sync();
298
291 return err; 299 return err;
292} 300}
293 301
@@ -302,6 +310,61 @@ static const struct i2c_algorithm au1550_algo = {
302 .functionality = au1550_func, 310 .functionality = au1550_func,
303}; 311};
304 312
313static void i2c_au1550_setup(struct i2c_au1550_data *priv)
314{
315 volatile psc_smb_t *sp = (volatile psc_smb_t *)priv->psc_base;
316 u32 stat;
317
318 sp->psc_ctrl = PSC_CTRL_DISABLE;
319 au_sync();
320 sp->psc_sel = PSC_SEL_PS_SMBUSMODE;
321 sp->psc_smbcfg = 0;
322 au_sync();
323 sp->psc_ctrl = PSC_CTRL_ENABLE;
324 au_sync();
325 do {
326 stat = sp->psc_smbstat;
327 au_sync();
328 } while ((stat & PSC_SMBSTAT_SR) == 0);
329
330 sp->psc_smbcfg = (PSC_SMBCFG_RT_FIFO8 | PSC_SMBCFG_TT_FIFO8 |
331 PSC_SMBCFG_DD_DISABLE);
332
333 /* Divide by 8 to get a 6.25 MHz clock. The later protocol
334 * timings are based on this clock.
335 */
336 sp->psc_smbcfg |= PSC_SMBCFG_SET_DIV(PSC_SMBCFG_DIV8);
337 sp->psc_smbmsk = PSC_SMBMSK_ALLMASK;
338 au_sync();
339
340 /* Set the protocol timer values. See Table 71 in the
341 * Au1550 Data Book for standard timing values.
342 */
343 sp->psc_smbtmr = PSC_SMBTMR_SET_TH(0) | PSC_SMBTMR_SET_PS(15) | \
344 PSC_SMBTMR_SET_PU(15) | PSC_SMBTMR_SET_SH(15) | \
345 PSC_SMBTMR_SET_SU(15) | PSC_SMBTMR_SET_CL(15) | \
346 PSC_SMBTMR_SET_CH(15);
347 au_sync();
348
349 sp->psc_smbcfg |= PSC_SMBCFG_DE_ENABLE;
350 do {
351 stat = sp->psc_smbstat;
352 au_sync();
353 } while ((stat & PSC_SMBSTAT_SR) == 0);
354
355 sp->psc_ctrl = PSC_CTRL_SUSPEND;
356 au_sync();
357}
358
359static void i2c_au1550_disable(struct i2c_au1550_data *priv)
360{
361 volatile psc_smb_t *sp = (volatile psc_smb_t *)priv->psc_base;
362
363 sp->psc_smbcfg = 0;
364 sp->psc_ctrl = PSC_CTRL_DISABLE;
365 au_sync();
366}
367
305/* 368/*
306 * registering functions to load algorithms at runtime 369 * registering functions to load algorithms at runtime
307 * Prior to calling us, the 50MHz clock frequency and routing 370 * Prior to calling us, the 50MHz clock frequency and routing
@@ -311,9 +374,7 @@ static int __devinit
311i2c_au1550_probe(struct platform_device *pdev) 374i2c_au1550_probe(struct platform_device *pdev)
312{ 375{
313 struct i2c_au1550_data *priv; 376 struct i2c_au1550_data *priv;
314 volatile psc_smb_t *sp;
315 struct resource *r; 377 struct resource *r;
316 u32 stat;
317 int ret; 378 int ret;
318 379
319 r = platform_get_resource(pdev, IORESOURCE_MEM, 0); 380 r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -348,43 +409,7 @@ i2c_au1550_probe(struct platform_device *pdev)
348 409
349 /* Now, set up the PSC for SMBus PIO mode. 410 /* Now, set up the PSC for SMBus PIO mode.
350 */ 411 */
351 sp = (volatile psc_smb_t *)priv->psc_base; 412 i2c_au1550_setup(priv);
352 sp->psc_ctrl = PSC_CTRL_DISABLE;
353 au_sync();
354 sp->psc_sel = PSC_SEL_PS_SMBUSMODE;
355 sp->psc_smbcfg = 0;
356 au_sync();
357 sp->psc_ctrl = PSC_CTRL_ENABLE;
358 au_sync();
359 do {
360 stat = sp->psc_smbstat;
361 au_sync();
362 } while ((stat & PSC_SMBSTAT_SR) == 0);
363
364 sp->psc_smbcfg = (PSC_SMBCFG_RT_FIFO8 | PSC_SMBCFG_TT_FIFO8 |
365 PSC_SMBCFG_DD_DISABLE);
366
367 /* Divide by 8 to get a 6.25 MHz clock. The later protocol
368 * timings are based on this clock.
369 */
370 sp->psc_smbcfg |= PSC_SMBCFG_SET_DIV(PSC_SMBCFG_DIV8);
371 sp->psc_smbmsk = PSC_SMBMSK_ALLMASK;
372 au_sync();
373
374 /* Set the protocol timer values. See Table 71 in the
375 * Au1550 Data Book for standard timing values.
376 */
377 sp->psc_smbtmr = PSC_SMBTMR_SET_TH(0) | PSC_SMBTMR_SET_PS(15) | \
378 PSC_SMBTMR_SET_PU(15) | PSC_SMBTMR_SET_SH(15) | \
379 PSC_SMBTMR_SET_SU(15) | PSC_SMBTMR_SET_CL(15) | \
380 PSC_SMBTMR_SET_CH(15);
381 au_sync();
382
383 sp->psc_smbcfg |= PSC_SMBCFG_DE_ENABLE;
384 do {
385 stat = sp->psc_smbstat;
386 au_sync();
387 } while ((stat & PSC_SMBSTAT_DR) == 0);
388 413
389 ret = i2c_add_numbered_adapter(&priv->adap); 414 ret = i2c_add_numbered_adapter(&priv->adap);
390 if (ret == 0) { 415 if (ret == 0) {
@@ -392,10 +417,7 @@ i2c_au1550_probe(struct platform_device *pdev)
392 return 0; 417 return 0;
393 } 418 }
394 419
395 /* disable the PSC */ 420 i2c_au1550_disable(priv);
396 sp->psc_smbcfg = 0;
397 sp->psc_ctrl = PSC_CTRL_DISABLE;
398 au_sync();
399 421
400 release_resource(priv->ioarea); 422 release_resource(priv->ioarea);
401 kfree(priv->ioarea); 423 kfree(priv->ioarea);
@@ -409,27 +431,24 @@ static int __devexit
409i2c_au1550_remove(struct platform_device *pdev) 431i2c_au1550_remove(struct platform_device *pdev)
410{ 432{
411 struct i2c_au1550_data *priv = platform_get_drvdata(pdev); 433 struct i2c_au1550_data *priv = platform_get_drvdata(pdev);
412 volatile psc_smb_t *sp = (volatile psc_smb_t *)priv->psc_base;
413 434
414 platform_set_drvdata(pdev, NULL); 435 platform_set_drvdata(pdev, NULL);
415 i2c_del_adapter(&priv->adap); 436 i2c_del_adapter(&priv->adap);
416 sp->psc_smbcfg = 0; 437 i2c_au1550_disable(priv);
417 sp->psc_ctrl = PSC_CTRL_DISABLE;
418 au_sync();
419 release_resource(priv->ioarea); 438 release_resource(priv->ioarea);
420 kfree(priv->ioarea); 439 kfree(priv->ioarea);
421 kfree(priv); 440 kfree(priv);
422 return 0; 441 return 0;
423} 442}
424 443
444#ifdef CONFIG_PM
425static int 445static int
426i2c_au1550_suspend(struct platform_device *pdev, pm_message_t state) 446i2c_au1550_suspend(struct platform_device *pdev, pm_message_t state)
427{ 447{
428 struct i2c_au1550_data *priv = platform_get_drvdata(pdev); 448 struct i2c_au1550_data *priv = platform_get_drvdata(pdev);
429 volatile psc_smb_t *sp = (volatile psc_smb_t *)priv->psc_base;
430 449
431 sp->psc_ctrl = PSC_CTRL_SUSPEND; 450 i2c_au1550_disable(priv);
432 au_sync(); 451
433 return 0; 452 return 0;
434} 453}
435 454
@@ -437,14 +456,15 @@ static int
437i2c_au1550_resume(struct platform_device *pdev) 456i2c_au1550_resume(struct platform_device *pdev)
438{ 457{
439 struct i2c_au1550_data *priv = platform_get_drvdata(pdev); 458 struct i2c_au1550_data *priv = platform_get_drvdata(pdev);
440 volatile psc_smb_t *sp = (volatile psc_smb_t *)priv->psc_base;
441 459
442 sp->psc_ctrl = PSC_CTRL_ENABLE; 460 i2c_au1550_setup(priv);
443 au_sync(); 461
444 while (!(sp->psc_smbstat & PSC_SMBSTAT_SR))
445 au_sync();
446 return 0; 462 return 0;
447} 463}
464#else
465#define i2c_au1550_suspend NULL
466#define i2c_au1550_resume NULL
467#endif
448 468
449static struct platform_driver au1xpsc_smbus_driver = { 469static struct platform_driver au1xpsc_smbus_driver = {
450 .driver = { 470 .driver = {
diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c
new file mode 100644
index 000000000000..8164de1f4d72
--- /dev/null
+++ b/drivers/i2c/busses/i2c-cpm.c
@@ -0,0 +1,745 @@
1/*
2 * Freescale CPM1/CPM2 I2C interface.
3 * Copyright (c) 1999 Dan Malek (dmalek@jlc.net).
4 *
5 * moved into proper i2c interface;
6 * Brad Parker (brad@heeltoe.com)
7 *
8 * Parts from dbox2_i2c.c (cvs.tuxbox.org)
9 * (C) 2000-2001 Felix Domke (tmbinc@gmx.net), Gillem (htoa@gmx.net)
10 *
11 * (C) 2007 Montavista Software, Inc.
12 * Vitaly Bordug <vitb@kernel.crashing.org>
13 *
14 * Converted to of_platform_device. Renamed to i2c-cpm.c.
15 * (C) 2007,2008 Jochen Friedrich <jochen@scram.de>
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 */
31
32#include <linux/kernel.h>
33#include <linux/module.h>
34#include <linux/delay.h>
35#include <linux/slab.h>
36#include <linux/init.h>
37#include <linux/interrupt.h>
38#include <linux/errno.h>
39#include <linux/stddef.h>
40#include <linux/i2c.h>
41#include <linux/io.h>
42#include <linux/dma-mapping.h>
43#include <linux/of_device.h>
44#include <linux/of_platform.h>
45#include <linux/of_i2c.h>
46#include <sysdev/fsl_soc.h>
47#include <asm/cpm.h>
48
49/* Try to define this if you have an older CPU (earlier than rev D4) */
50/* However, better use a GPIO based bitbang driver in this case :/ */
51#undef I2C_CHIP_ERRATA
52
53#define CPM_MAX_READ 513
54#define CPM_MAXBD 4
55
56#define I2C_EB (0x10) /* Big endian mode */
57#define I2C_EB_CPM2 (0x30) /* Big endian mode, memory snoop */
58
59#define DPRAM_BASE ((u8 __iomem __force *)cpm_muram_addr(0))
60
61/* I2C parameter RAM. */
62struct i2c_ram {
63 ushort rbase; /* Rx Buffer descriptor base address */
64 ushort tbase; /* Tx Buffer descriptor base address */
65 u_char rfcr; /* Rx function code */
66 u_char tfcr; /* Tx function code */
67 ushort mrblr; /* Max receive buffer length */
68 uint rstate; /* Internal */
69 uint rdp; /* Internal */
70 ushort rbptr; /* Rx Buffer descriptor pointer */
71 ushort rbc; /* Internal */
72 uint rxtmp; /* Internal */
73 uint tstate; /* Internal */
74 uint tdp; /* Internal */
75 ushort tbptr; /* Tx Buffer descriptor pointer */
76 ushort tbc; /* Internal */
77 uint txtmp; /* Internal */
78 char res1[4]; /* Reserved */
79 ushort rpbase; /* Relocation pointer */
80 char res2[2]; /* Reserved */
81};
82
83#define I2COM_START 0x80
84#define I2COM_MASTER 0x01
85#define I2CER_TXE 0x10
86#define I2CER_BUSY 0x04
87#define I2CER_TXB 0x02
88#define I2CER_RXB 0x01
89#define I2MOD_EN 0x01
90
91/* I2C Registers */
92struct i2c_reg {
93 u8 i2mod;
94 u8 res1[3];
95 u8 i2add;
96 u8 res2[3];
97 u8 i2brg;
98 u8 res3[3];
99 u8 i2com;
100 u8 res4[3];
101 u8 i2cer;
102 u8 res5[3];
103 u8 i2cmr;
104};
105
106struct cpm_i2c {
107 char *base;
108 struct of_device *ofdev;
109 struct i2c_adapter adap;
110 uint dp_addr;
111 int version; /* CPM1=1, CPM2=2 */
112 int irq;
113 int cp_command;
114 int freq;
115 struct i2c_reg __iomem *i2c_reg;
116 struct i2c_ram __iomem *i2c_ram;
117 u16 i2c_addr;
118 wait_queue_head_t i2c_wait;
119 cbd_t __iomem *tbase;
120 cbd_t __iomem *rbase;
121 u_char *txbuf[CPM_MAXBD];
122 u_char *rxbuf[CPM_MAXBD];
123 u32 txdma[CPM_MAXBD];
124 u32 rxdma[CPM_MAXBD];
125};
126
127static irqreturn_t cpm_i2c_interrupt(int irq, void *dev_id)
128{
129 struct cpm_i2c *cpm;
130 struct i2c_reg __iomem *i2c_reg;
131 struct i2c_adapter *adap = dev_id;
132 int i;
133
134 cpm = i2c_get_adapdata(dev_id);
135 i2c_reg = cpm->i2c_reg;
136
137 /* Clear interrupt. */
138 i = in_8(&i2c_reg->i2cer);
139 out_8(&i2c_reg->i2cer, i);
140
141 dev_dbg(&adap->dev, "Interrupt: %x\n", i);
142
143 wake_up_interruptible(&cpm->i2c_wait);
144
145 return i ? IRQ_HANDLED : IRQ_NONE;
146}
147
148static void cpm_reset_i2c_params(struct cpm_i2c *cpm)
149{
150 struct i2c_ram __iomem *i2c_ram = cpm->i2c_ram;
151
152 /* Set up the I2C parameters in the parameter ram. */
153 out_be16(&i2c_ram->tbase, (u8 __iomem *)cpm->tbase - DPRAM_BASE);
154 out_be16(&i2c_ram->rbase, (u8 __iomem *)cpm->rbase - DPRAM_BASE);
155
156 if (cpm->version == 1) {
157 out_8(&i2c_ram->tfcr, I2C_EB);
158 out_8(&i2c_ram->rfcr, I2C_EB);
159 } else {
160 out_8(&i2c_ram->tfcr, I2C_EB_CPM2);
161 out_8(&i2c_ram->rfcr, I2C_EB_CPM2);
162 }
163
164 out_be16(&i2c_ram->mrblr, CPM_MAX_READ);
165
166 out_be32(&i2c_ram->rstate, 0);
167 out_be32(&i2c_ram->rdp, 0);
168 out_be16(&i2c_ram->rbptr, 0);
169 out_be16(&i2c_ram->rbc, 0);
170 out_be32(&i2c_ram->rxtmp, 0);
171 out_be32(&i2c_ram->tstate, 0);
172 out_be32(&i2c_ram->tdp, 0);
173 out_be16(&i2c_ram->tbptr, 0);
174 out_be16(&i2c_ram->tbc, 0);
175 out_be32(&i2c_ram->txtmp, 0);
176}
177
178static void cpm_i2c_force_close(struct i2c_adapter *adap)
179{
180 struct cpm_i2c *cpm = i2c_get_adapdata(adap);
181 struct i2c_reg __iomem *i2c_reg = cpm->i2c_reg;
182
183 dev_dbg(&adap->dev, "cpm_i2c_force_close()\n");
184
185 cpm_command(cpm->cp_command, CPM_CR_CLOSE_RX_BD);
186
187 out_8(&i2c_reg->i2cmr, 0x00); /* Disable all interrupts */
188 out_8(&i2c_reg->i2cer, 0xff);
189}
190
191static void cpm_i2c_parse_message(struct i2c_adapter *adap,
192 struct i2c_msg *pmsg, int num, int tx, int rx)
193{
194 cbd_t __iomem *tbdf;
195 cbd_t __iomem *rbdf;
196 u_char addr;
197 u_char *tb;
198 u_char *rb;
199 struct cpm_i2c *cpm = i2c_get_adapdata(adap);
200
201 tbdf = cpm->tbase + tx;
202 rbdf = cpm->rbase + rx;
203
204 addr = pmsg->addr << 1;
205 if (pmsg->flags & I2C_M_RD)
206 addr |= 1;
207
208 tb = cpm->txbuf[tx];
209 rb = cpm->rxbuf[rx];
210
211 /* Align read buffer */
212 rb = (u_char *) (((ulong) rb + 1) & ~1);
213
214 tb[0] = addr; /* Device address byte w/rw flag */
215
216 out_be16(&tbdf->cbd_datlen, pmsg->len + 1);
217 out_be16(&tbdf->cbd_sc, 0);
218
219 if (!(pmsg->flags & I2C_M_NOSTART))
220 setbits16(&tbdf->cbd_sc, BD_I2C_START);
221
222 if (tx + 1 == num)
223 setbits16(&tbdf->cbd_sc, BD_SC_LAST | BD_SC_WRAP);
224
225 if (pmsg->flags & I2C_M_RD) {
226 /*
227 * To read, we need an empty buffer of the proper length.
228 * All that is used is the first byte for address, the remainder
229 * is just used for timing (and doesn't really have to exist).
230 */
231
232 dev_dbg(&adap->dev, "cpm_i2c_read(abyte=0x%x)\n", addr);
233
234 out_be16(&rbdf->cbd_datlen, 0);
235 out_be16(&rbdf->cbd_sc, BD_SC_EMPTY | BD_SC_INTRPT);
236
237 if (rx + 1 == CPM_MAXBD)
238 setbits16(&rbdf->cbd_sc, BD_SC_WRAP);
239
240 eieio();
241 setbits16(&tbdf->cbd_sc, BD_SC_READY);
242 } else {
243 dev_dbg(&adap->dev, "cpm_i2c_write(abyte=0x%x)\n", addr);
244
245 memcpy(tb+1, pmsg->buf, pmsg->len);
246
247 eieio();
248 setbits16(&tbdf->cbd_sc, BD_SC_READY | BD_SC_INTRPT);
249 }
250}
251
252static int cpm_i2c_check_message(struct i2c_adapter *adap,
253 struct i2c_msg *pmsg, int tx, int rx)
254{
255 cbd_t __iomem *tbdf;
256 cbd_t __iomem *rbdf;
257 u_char *tb;
258 u_char *rb;
259 struct cpm_i2c *cpm = i2c_get_adapdata(adap);
260
261 tbdf = cpm->tbase + tx;
262 rbdf = cpm->rbase + rx;
263
264 tb = cpm->txbuf[tx];
265 rb = cpm->rxbuf[rx];
266
267 /* Align read buffer */
268 rb = (u_char *) (((uint) rb + 1) & ~1);
269
270 eieio();
271 if (pmsg->flags & I2C_M_RD) {
272 dev_dbg(&adap->dev, "tx sc 0x%04x, rx sc 0x%04x\n",
273 in_be16(&tbdf->cbd_sc), in_be16(&rbdf->cbd_sc));
274
275 if (in_be16(&tbdf->cbd_sc) & BD_SC_NAK) {
276 dev_dbg(&adap->dev, "I2C read; No ack\n");
277 return -ENXIO;
278 }
279 if (in_be16(&rbdf->cbd_sc) & BD_SC_EMPTY) {
280 dev_err(&adap->dev,
281 "I2C read; complete but rbuf empty\n");
282 return -EREMOTEIO;
283 }
284 if (in_be16(&rbdf->cbd_sc) & BD_SC_OV) {
285 dev_err(&adap->dev, "I2C read; Overrun\n");
286 return -EREMOTEIO;
287 }
288 memcpy(pmsg->buf, rb, pmsg->len);
289 } else {
290 dev_dbg(&adap->dev, "tx sc %d 0x%04x\n", tx,
291 in_be16(&tbdf->cbd_sc));
292
293 if (in_be16(&tbdf->cbd_sc) & BD_SC_NAK) {
294 dev_dbg(&adap->dev, "I2C write; No ack\n");
295 return -ENXIO;
296 }
297 if (in_be16(&tbdf->cbd_sc) & BD_SC_UN) {
298 dev_err(&adap->dev, "I2C write; Underrun\n");
299 return -EIO;
300 }
301 if (in_be16(&tbdf->cbd_sc) & BD_SC_CL) {
302 dev_err(&adap->dev, "I2C write; Collision\n");
303 return -EIO;
304 }
305 }
306 return 0;
307}
308
309static int cpm_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
310{
311 struct cpm_i2c *cpm = i2c_get_adapdata(adap);
312 struct i2c_reg __iomem *i2c_reg = cpm->i2c_reg;
313 struct i2c_ram __iomem *i2c_ram = cpm->i2c_ram;
314 struct i2c_msg *pmsg;
315 int ret, i;
316 int tptr;
317 int rptr;
318 cbd_t __iomem *tbdf;
319 cbd_t __iomem *rbdf;
320
321 if (num > CPM_MAXBD)
322 return -EINVAL;
323
324 /* Check if we have any oversized READ requests */
325 for (i = 0; i < num; i++) {
326 pmsg = &msgs[i];
327 if (pmsg->len >= CPM_MAX_READ)
328 return -EINVAL;
329 }
330
331 /* Reset to use first buffer */
332 out_be16(&i2c_ram->rbptr, in_be16(&i2c_ram->rbase));
333 out_be16(&i2c_ram->tbptr, in_be16(&i2c_ram->tbase));
334
335 tbdf = cpm->tbase;
336 rbdf = cpm->rbase;
337
338 tptr = 0;
339 rptr = 0;
340
341 while (tptr < num) {
342 pmsg = &msgs[tptr];
343 dev_dbg(&adap->dev, "R: %d T: %d\n", rptr, tptr);
344
345 cpm_i2c_parse_message(adap, pmsg, num, tptr, rptr);
346 if (pmsg->flags & I2C_M_RD)
347 rptr++;
348 tptr++;
349 }
350 /* Start transfer now */
351 /* Enable RX/TX/Error interupts */
352 out_8(&i2c_reg->i2cmr, I2CER_TXE | I2CER_TXB | I2CER_RXB);
353 out_8(&i2c_reg->i2cer, 0xff); /* Clear interrupt status */
354 /* Chip bug, set enable here */
355 setbits8(&i2c_reg->i2mod, I2MOD_EN); /* Enable */
356 /* Begin transmission */
357 setbits8(&i2c_reg->i2com, I2COM_START);
358
359 tptr = 0;
360 rptr = 0;
361
362 while (tptr < num) {
363 /* Check for outstanding messages */
364 dev_dbg(&adap->dev, "test ready.\n");
365 pmsg = &msgs[tptr];
366 if (pmsg->flags & I2C_M_RD)
367 ret = wait_event_interruptible_timeout(cpm->i2c_wait,
368 !(in_be16(&rbdf[rptr].cbd_sc) & BD_SC_EMPTY),
369 1 * HZ);
370 else
371 ret = wait_event_interruptible_timeout(cpm->i2c_wait,
372 !(in_be16(&tbdf[tptr].cbd_sc) & BD_SC_READY),
373 1 * HZ);
374 if (ret == 0) {
375 ret = -EREMOTEIO;
376 dev_err(&adap->dev, "I2C transfer: timeout\n");
377 goto out_err;
378 }
379 if (ret > 0) {
380 dev_dbg(&adap->dev, "ready.\n");
381 ret = cpm_i2c_check_message(adap, pmsg, tptr, rptr);
382 tptr++;
383 if (pmsg->flags & I2C_M_RD)
384 rptr++;
385 if (ret)
386 goto out_err;
387 }
388 }
389#ifdef I2C_CHIP_ERRATA
390 /*
391 * Chip errata, clear enable. This is not needed on rev D4 CPUs.
392 * Disabling I2C too early may cause too short stop condition
393 */
394 udelay(4);
395 clrbits8(&i2c_reg->i2mod, I2MOD_EN);
396#endif
397 return (num);
398
399out_err:
400 cpm_i2c_force_close(adap);
401#ifdef I2C_CHIP_ERRATA
402 /*
403 * Chip errata, clear enable. This is not needed on rev D4 CPUs.
404 */
405 clrbits8(&i2c_reg->i2mod, I2MOD_EN);
406#endif
407 return ret;
408}
409
410static u32 cpm_i2c_func(struct i2c_adapter *adap)
411{
412 return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK);
413}
414
415/* -----exported algorithm data: ------------------------------------- */
416
417static const struct i2c_algorithm cpm_i2c_algo = {
418 .master_xfer = cpm_i2c_xfer,
419 .functionality = cpm_i2c_func,
420};
421
422static const struct i2c_adapter cpm_ops = {
423 .owner = THIS_MODULE,
424 .name = "i2c-cpm",
425 .algo = &cpm_i2c_algo,
426 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
427};
428
429static int __devinit cpm_i2c_setup(struct cpm_i2c *cpm)
430{
431 struct of_device *ofdev = cpm->ofdev;
432 const u32 *data;
433 int len, ret, i;
434 void __iomem *i2c_base;
435 cbd_t __iomem *tbdf;
436 cbd_t __iomem *rbdf;
437 unsigned char brg;
438
439 dev_dbg(&cpm->ofdev->dev, "cpm_i2c_setup()\n");
440
441 init_waitqueue_head(&cpm->i2c_wait);
442
443 cpm->irq = of_irq_to_resource(ofdev->node, 0, NULL);
444 if (cpm->irq == NO_IRQ)
445 return -EINVAL;
446
447 /* Install interrupt handler. */
448 ret = request_irq(cpm->irq, cpm_i2c_interrupt, 0, "cpm_i2c",
449 &cpm->adap);
450 if (ret)
451 return ret;
452
453 /* I2C parameter RAM */
454 i2c_base = of_iomap(ofdev->node, 1);
455 if (i2c_base == NULL) {
456 ret = -EINVAL;
457 goto out_irq;
458 }
459
460 if (of_device_is_compatible(ofdev->node, "fsl,cpm1-i2c")) {
461
462 /* Check for and use a microcode relocation patch. */
463 cpm->i2c_ram = i2c_base;
464 cpm->i2c_addr = in_be16(&cpm->i2c_ram->rpbase);
465
466 /*
467 * Maybe should use cpm_muram_alloc instead of hardcoding
468 * this in micropatch.c
469 */
470 if (cpm->i2c_addr) {
471 cpm->i2c_ram = cpm_muram_addr(cpm->i2c_addr);
472 iounmap(i2c_base);
473 }
474
475 cpm->version = 1;
476
477 } else if (of_device_is_compatible(ofdev->node, "fsl,cpm2-i2c")) {
478 cpm->i2c_addr = cpm_muram_alloc(sizeof(struct i2c_ram), 64);
479 cpm->i2c_ram = cpm_muram_addr(cpm->i2c_addr);
480 out_be16(i2c_base, cpm->i2c_addr);
481 iounmap(i2c_base);
482
483 cpm->version = 2;
484
485 } else {
486 iounmap(i2c_base);
487 ret = -EINVAL;
488 goto out_irq;
489 }
490
491 /* I2C control/status registers */
492 cpm->i2c_reg = of_iomap(ofdev->node, 0);
493 if (cpm->i2c_reg == NULL) {
494 ret = -EINVAL;
495 goto out_ram;
496 }
497
498 data = of_get_property(ofdev->node, "fsl,cpm-command", &len);
499 if (!data || len != 4) {
500 ret = -EINVAL;
501 goto out_reg;
502 }
503 cpm->cp_command = *data;
504
505 data = of_get_property(ofdev->node, "linux,i2c-class", &len);
506 if (data && len == 4)
507 cpm->adap.class = *data;
508
509 data = of_get_property(ofdev->node, "clock-frequency", &len);
510 if (data && len == 4)
511 cpm->freq = *data;
512 else
513 cpm->freq = 60000; /* use 60kHz i2c clock by default */
514
515 /*
516 * Allocate space for CPM_MAXBD transmit and receive buffer
517 * descriptors in the DP ram.
518 */
519 cpm->dp_addr = cpm_muram_alloc(sizeof(cbd_t) * 2 * CPM_MAXBD, 8);
520 if (!cpm->dp_addr) {
521 ret = -ENOMEM;
522 goto out_reg;
523 }
524
525 cpm->tbase = cpm_muram_addr(cpm->dp_addr);
526 cpm->rbase = cpm_muram_addr(cpm->dp_addr + sizeof(cbd_t) * CPM_MAXBD);
527
528 /* Allocate TX and RX buffers */
529
530 tbdf = cpm->tbase;
531 rbdf = cpm->rbase;
532
533 for (i = 0; i < CPM_MAXBD; i++) {
534 cpm->rxbuf[i] = dma_alloc_coherent(
535 NULL, CPM_MAX_READ + 1, &cpm->rxdma[i], GFP_KERNEL);
536 if (!cpm->rxbuf[i]) {
537 ret = -ENOMEM;
538 goto out_muram;
539 }
540 out_be32(&rbdf[i].cbd_bufaddr, ((cpm->rxdma[i] + 1) & ~1));
541
542 cpm->txbuf[i] = (unsigned char *)dma_alloc_coherent(
543 NULL, CPM_MAX_READ + 1, &cpm->txdma[i], GFP_KERNEL);
544 if (!cpm->txbuf[i]) {
545 ret = -ENOMEM;
546 goto out_muram;
547 }
548 out_be32(&tbdf[i].cbd_bufaddr, cpm->txdma[i]);
549 }
550
551 /* Initialize Tx/Rx parameters. */
552
553 cpm_reset_i2c_params(cpm);
554
555 dev_dbg(&cpm->ofdev->dev, "i2c_ram 0x%p, i2c_addr 0x%04x, freq %d\n",
556 cpm->i2c_ram, cpm->i2c_addr, cpm->freq);
557 dev_dbg(&cpm->ofdev->dev, "tbase 0x%04x, rbase 0x%04x\n",
558 (u8 __iomem *)cpm->tbase - DPRAM_BASE,
559 (u8 __iomem *)cpm->rbase - DPRAM_BASE);
560
561 cpm_command(cpm->cp_command, CPM_CR_INIT_TRX);
562
563 /*
564 * Select an invalid address. Just make sure we don't use loopback mode
565 */
566 out_8(&cpm->i2c_reg->i2add, 0x7f << 1);
567
568 /*
569 * PDIV is set to 00 in i2mod, so brgclk/32 is used as input to the
570 * i2c baud rate generator. This is divided by 2 x (DIV + 3) to get
571 * the actual i2c bus frequency.
572 */
573 brg = get_brgfreq() / (32 * 2 * cpm->freq) - 3;
574 out_8(&cpm->i2c_reg->i2brg, brg);
575
576 out_8(&cpm->i2c_reg->i2mod, 0x00);
577 out_8(&cpm->i2c_reg->i2com, I2COM_MASTER); /* Master mode */
578
579 /* Disable interrupts. */
580 out_8(&cpm->i2c_reg->i2cmr, 0);
581 out_8(&cpm->i2c_reg->i2cer, 0xff);
582
583 return 0;
584
585out_muram:
586 for (i = 0; i < CPM_MAXBD; i++) {
587 if (cpm->rxbuf[i])
588 dma_free_coherent(NULL, CPM_MAX_READ + 1,
589 cpm->rxbuf[i], cpm->rxdma[i]);
590 if (cpm->txbuf[i])
591 dma_free_coherent(NULL, CPM_MAX_READ + 1,
592 cpm->txbuf[i], cpm->txdma[i]);
593 }
594 cpm_muram_free(cpm->dp_addr);
595out_reg:
596 iounmap(cpm->i2c_reg);
597out_ram:
598 if ((cpm->version == 1) && (!cpm->i2c_addr))
599 iounmap(cpm->i2c_ram);
600 if (cpm->version == 2)
601 cpm_muram_free(cpm->i2c_addr);
602out_irq:
603 free_irq(cpm->irq, &cpm->adap);
604 return ret;
605}
606
607static void cpm_i2c_shutdown(struct cpm_i2c *cpm)
608{
609 int i;
610
611 /* Shut down I2C. */
612 clrbits8(&cpm->i2c_reg->i2mod, I2MOD_EN);
613
614 /* Disable interrupts */
615 out_8(&cpm->i2c_reg->i2cmr, 0);
616 out_8(&cpm->i2c_reg->i2cer, 0xff);
617
618 free_irq(cpm->irq, &cpm->adap);
619
620 /* Free all memory */
621 for (i = 0; i < CPM_MAXBD; i++) {
622 dma_free_coherent(NULL, CPM_MAX_READ + 1,
623 cpm->rxbuf[i], cpm->rxdma[i]);
624 dma_free_coherent(NULL, CPM_MAX_READ + 1,
625 cpm->txbuf[i], cpm->txdma[i]);
626 }
627
628 cpm_muram_free(cpm->dp_addr);
629 iounmap(cpm->i2c_reg);
630
631 if ((cpm->version == 1) && (!cpm->i2c_addr))
632 iounmap(cpm->i2c_ram);
633 if (cpm->version == 2)
634 cpm_muram_free(cpm->i2c_addr);
635}
636
637static int __devinit cpm_i2c_probe(struct of_device *ofdev,
638 const struct of_device_id *match)
639{
640 int result, len;
641 struct cpm_i2c *cpm;
642 const u32 *data;
643
644 cpm = kzalloc(sizeof(struct cpm_i2c), GFP_KERNEL);
645 if (!cpm)
646 return -ENOMEM;
647
648 cpm->ofdev = ofdev;
649
650 dev_set_drvdata(&ofdev->dev, cpm);
651
652 cpm->adap = cpm_ops;
653 i2c_set_adapdata(&cpm->adap, cpm);
654 cpm->adap.dev.parent = &ofdev->dev;
655
656 result = cpm_i2c_setup(cpm);
657 if (result) {
658 dev_err(&ofdev->dev, "Unable to init hardware\n");
659 goto out_free;
660 }
661
662 /* register new adapter to i2c module... */
663
664 data = of_get_property(ofdev->node, "linux,i2c-index", &len);
665 if (data && len == 4) {
666 cpm->adap.nr = *data;
667 result = i2c_add_numbered_adapter(&cpm->adap);
668 } else
669 result = i2c_add_adapter(&cpm->adap);
670
671 if (result < 0) {
672 dev_err(&ofdev->dev, "Unable to register with I2C\n");
673 goto out_shut;
674 }
675
676 dev_dbg(&ofdev->dev, "hw routines for %s registered.\n",
677 cpm->adap.name);
678
679 /*
680 * register OF I2C devices
681 */
682 of_register_i2c_devices(&cpm->adap, ofdev->node);
683
684 return 0;
685out_shut:
686 cpm_i2c_shutdown(cpm);
687out_free:
688 dev_set_drvdata(&ofdev->dev, NULL);
689 kfree(cpm);
690
691 return result;
692}
693
694static int __devexit cpm_i2c_remove(struct of_device *ofdev)
695{
696 struct cpm_i2c *cpm = dev_get_drvdata(&ofdev->dev);
697
698 i2c_del_adapter(&cpm->adap);
699
700 cpm_i2c_shutdown(cpm);
701
702 dev_set_drvdata(&ofdev->dev, NULL);
703 kfree(cpm);
704
705 return 0;
706}
707
708static const struct of_device_id cpm_i2c_match[] = {
709 {
710 .compatible = "fsl,cpm1-i2c",
711 },
712 {
713 .compatible = "fsl,cpm2-i2c",
714 },
715 {},
716};
717
718MODULE_DEVICE_TABLE(of, cpm_i2c_match);
719
720static struct of_platform_driver cpm_i2c_driver = {
721 .match_table = cpm_i2c_match,
722 .probe = cpm_i2c_probe,
723 .remove = __devexit_p(cpm_i2c_remove),
724 .driver = {
725 .name = "fsl-i2c-cpm",
726 .owner = THIS_MODULE,
727 }
728};
729
730static int __init cpm_i2c_init(void)
731{
732 return of_register_platform_driver(&cpm_i2c_driver);
733}
734
735static void __exit cpm_i2c_exit(void)
736{
737 of_unregister_platform_driver(&cpm_i2c_driver);
738}
739
740module_init(cpm_i2c_init);
741module_exit(cpm_i2c_exit);
742
743MODULE_AUTHOR("Jochen Friedrich <jochen@scram.de>");
744MODULE_DESCRIPTION("I2C-Bus adapter routines for CPM boards");
745MODULE_LICENSE("GPL");
diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c
index 7ecbfc429b19..af3846eda985 100644
--- a/drivers/i2c/busses/i2c-davinci.c
+++ b/drivers/i2c/busses/i2c-davinci.c
@@ -85,6 +85,7 @@
85#define DAVINCI_I2C_MDR_MST (1 << 10) 85#define DAVINCI_I2C_MDR_MST (1 << 10)
86#define DAVINCI_I2C_MDR_TRX (1 << 9) 86#define DAVINCI_I2C_MDR_TRX (1 << 9)
87#define DAVINCI_I2C_MDR_XA (1 << 8) 87#define DAVINCI_I2C_MDR_XA (1 << 8)
88#define DAVINCI_I2C_MDR_RM (1 << 7)
88#define DAVINCI_I2C_MDR_IRS (1 << 5) 89#define DAVINCI_I2C_MDR_IRS (1 << 5)
89 90
90#define DAVINCI_I2C_IMR_AAS (1 << 6) 91#define DAVINCI_I2C_IMR_AAS (1 << 6)
@@ -112,6 +113,7 @@ struct davinci_i2c_dev {
112 u8 *buf; 113 u8 *buf;
113 size_t buf_len; 114 size_t buf_len;
114 int irq; 115 int irq;
116 u8 terminate;
115 struct i2c_adapter adapter; 117 struct i2c_adapter adapter;
116}; 118};
117 119
@@ -142,6 +144,7 @@ static int i2c_davinci_init(struct davinci_i2c_dev *dev)
142 struct davinci_i2c_platform_data *pdata = dev->dev->platform_data; 144 struct davinci_i2c_platform_data *pdata = dev->dev->platform_data;
143 u16 psc; 145 u16 psc;
144 u32 clk; 146 u32 clk;
147 u32 d;
145 u32 clkh; 148 u32 clkh;
146 u32 clkl; 149 u32 clkl;
147 u32 input_clock = clk_get_rate(dev->clk); 150 u32 input_clock = clk_get_rate(dev->clk);
@@ -171,23 +174,29 @@ static int i2c_davinci_init(struct davinci_i2c_dev *dev)
171 * if PSC > 1 , d = 5 174 * if PSC > 1 , d = 5
172 */ 175 */
173 176
174 psc = 26; /* To get 1MHz clock */ 177 /* get minimum of 7 MHz clock, but max of 12 MHz */
178 psc = (input_clock / 7000000) - 1;
179 if ((input_clock / (psc + 1)) > 12000000)
180 psc++; /* better to run under spec than over */
181 d = (psc >= 2) ? 5 : 7 - psc;
175 182
176 clk = ((input_clock / (psc + 1)) / (pdata->bus_freq * 1000)) - 10; 183 clk = ((input_clock / (psc + 1)) / (pdata->bus_freq * 1000)) - (d << 1);
177 clkh = (50 * clk) / 100; 184 clkh = clk >> 1;
178 clkl = clk - clkh; 185 clkl = clk - clkh;
179 186
180 davinci_i2c_write_reg(dev, DAVINCI_I2C_PSC_REG, psc); 187 davinci_i2c_write_reg(dev, DAVINCI_I2C_PSC_REG, psc);
181 davinci_i2c_write_reg(dev, DAVINCI_I2C_CLKH_REG, clkh); 188 davinci_i2c_write_reg(dev, DAVINCI_I2C_CLKH_REG, clkh);
182 davinci_i2c_write_reg(dev, DAVINCI_I2C_CLKL_REG, clkl); 189 davinci_i2c_write_reg(dev, DAVINCI_I2C_CLKL_REG, clkl);
183 190
184 dev_dbg(dev->dev, "CLK = %d\n", clk); 191 dev_dbg(dev->dev, "input_clock = %d, CLK = %d\n", input_clock, clk);
185 dev_dbg(dev->dev, "PSC = %d\n", 192 dev_dbg(dev->dev, "PSC = %d\n",
186 davinci_i2c_read_reg(dev, DAVINCI_I2C_PSC_REG)); 193 davinci_i2c_read_reg(dev, DAVINCI_I2C_PSC_REG));
187 dev_dbg(dev->dev, "CLKL = %d\n", 194 dev_dbg(dev->dev, "CLKL = %d\n",
188 davinci_i2c_read_reg(dev, DAVINCI_I2C_CLKL_REG)); 195 davinci_i2c_read_reg(dev, DAVINCI_I2C_CLKL_REG));
189 dev_dbg(dev->dev, "CLKH = %d\n", 196 dev_dbg(dev->dev, "CLKH = %d\n",
190 davinci_i2c_read_reg(dev, DAVINCI_I2C_CLKH_REG)); 197 davinci_i2c_read_reg(dev, DAVINCI_I2C_CLKH_REG));
198 dev_dbg(dev->dev, "bus_freq = %dkHz, bus_delay = %d\n",
199 pdata->bus_freq, pdata->bus_delay);
191 200
192 /* Take the I2C module out of reset: */ 201 /* Take the I2C module out of reset: */
193 w = davinci_i2c_read_reg(dev, DAVINCI_I2C_MDR_REG); 202 w = davinci_i2c_read_reg(dev, DAVINCI_I2C_MDR_REG);
@@ -233,7 +242,6 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop)
233 struct davinci_i2c_dev *dev = i2c_get_adapdata(adap); 242 struct davinci_i2c_dev *dev = i2c_get_adapdata(adap);
234 struct davinci_i2c_platform_data *pdata = dev->dev->platform_data; 243 struct davinci_i2c_platform_data *pdata = dev->dev->platform_data;
235 u32 flag; 244 u32 flag;
236 u32 stat;
237 u16 w; 245 u16 w;
238 int r; 246 int r;
239 247
@@ -254,12 +262,9 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop)
254 262
255 davinci_i2c_write_reg(dev, DAVINCI_I2C_CNT_REG, dev->buf_len); 263 davinci_i2c_write_reg(dev, DAVINCI_I2C_CNT_REG, dev->buf_len);
256 264
257 init_completion(&dev->cmd_complete); 265 INIT_COMPLETION(dev->cmd_complete);
258 dev->cmd_err = 0; 266 dev->cmd_err = 0;
259 267
260 /* Clear any pending interrupts by reading the IVR */
261 stat = davinci_i2c_read_reg(dev, DAVINCI_I2C_IVR_REG);
262
263 /* Take I2C out of reset, configure it as master and set the 268 /* Take I2C out of reset, configure it as master and set the
264 * start bit */ 269 * start bit */
265 flag = DAVINCI_I2C_MDR_IRS | DAVINCI_I2C_MDR_MST | DAVINCI_I2C_MDR_STT; 270 flag = DAVINCI_I2C_MDR_IRS | DAVINCI_I2C_MDR_MST | DAVINCI_I2C_MDR_STT;
@@ -280,20 +285,34 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop)
280 MOD_REG_BIT(w, DAVINCI_I2C_IMR_XRDY, 1); 285 MOD_REG_BIT(w, DAVINCI_I2C_IMR_XRDY, 1);
281 davinci_i2c_write_reg(dev, DAVINCI_I2C_IMR_REG, w); 286 davinci_i2c_write_reg(dev, DAVINCI_I2C_IMR_REG, w);
282 287
288 dev->terminate = 0;
283 /* write the data into mode register */ 289 /* write the data into mode register */
284 davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, flag); 290 davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, flag);
285 291
286 r = wait_for_completion_interruptible_timeout(&dev->cmd_complete, 292 r = wait_for_completion_interruptible_timeout(&dev->cmd_complete,
287 DAVINCI_I2C_TIMEOUT); 293 DAVINCI_I2C_TIMEOUT);
288 dev->buf_len = 0;
289 if (r < 0)
290 return r;
291
292 if (r == 0) { 294 if (r == 0) {
293 dev_err(dev->dev, "controller timed out\n"); 295 dev_err(dev->dev, "controller timed out\n");
294 i2c_davinci_init(dev); 296 i2c_davinci_init(dev);
297 dev->buf_len = 0;
295 return -ETIMEDOUT; 298 return -ETIMEDOUT;
296 } 299 }
300 if (dev->buf_len) {
301 /* This should be 0 if all bytes were transferred
302 * or dev->cmd_err denotes an error.
303 * A signal may have aborted the transfer.
304 */
305 if (r >= 0) {
306 dev_err(dev->dev, "abnormal termination buf_len=%i\n",
307 dev->buf_len);
308 r = -EREMOTEIO;
309 }
310 dev->terminate = 1;
311 wmb();
312 dev->buf_len = 0;
313 }
314 if (r < 0)
315 return r;
297 316
298 /* no error */ 317 /* no error */
299 if (likely(!dev->cmd_err)) 318 if (likely(!dev->cmd_err))
@@ -338,12 +357,11 @@ i2c_davinci_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
338 357
339 for (i = 0; i < num; i++) { 358 for (i = 0; i < num; i++) {
340 ret = i2c_davinci_xfer_msg(adap, &msgs[i], (i == (num - 1))); 359 ret = i2c_davinci_xfer_msg(adap, &msgs[i], (i == (num - 1)));
360 dev_dbg(dev->dev, "%s [%d/%d] ret: %d\n", __func__, i + 1, num,
361 ret);
341 if (ret < 0) 362 if (ret < 0)
342 return ret; 363 return ret;
343 } 364 }
344
345 dev_dbg(dev->dev, "%s:%d ret: %d\n", __func__, __LINE__, ret);
346
347 return num; 365 return num;
348} 366}
349 367
@@ -352,6 +370,27 @@ static u32 i2c_davinci_func(struct i2c_adapter *adap)
352 return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK); 370 return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK);
353} 371}
354 372
373static void terminate_read(struct davinci_i2c_dev *dev)
374{
375 u16 w = davinci_i2c_read_reg(dev, DAVINCI_I2C_MDR_REG);
376 w |= DAVINCI_I2C_MDR_NACK;
377 davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, w);
378
379 /* Throw away data */
380 davinci_i2c_read_reg(dev, DAVINCI_I2C_DRR_REG);
381 if (!dev->terminate)
382 dev_err(dev->dev, "RDR IRQ while no data requested\n");
383}
384static void terminate_write(struct davinci_i2c_dev *dev)
385{
386 u16 w = davinci_i2c_read_reg(dev, DAVINCI_I2C_MDR_REG);
387 w |= DAVINCI_I2C_MDR_RM | DAVINCI_I2C_MDR_STP;
388 davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, w);
389
390 if (!dev->terminate)
391 dev_err(dev->dev, "TDR IRQ while no data to send\n");
392}
393
355/* 394/*
356 * Interrupt service routine. This gets called whenever an I2C interrupt 395 * Interrupt service routine. This gets called whenever an I2C interrupt
357 * occurs. 396 * occurs.
@@ -372,12 +411,15 @@ static irqreturn_t i2c_davinci_isr(int this_irq, void *dev_id)
372 411
373 switch (stat) { 412 switch (stat) {
374 case DAVINCI_I2C_IVR_AL: 413 case DAVINCI_I2C_IVR_AL:
414 /* Arbitration lost, must retry */
375 dev->cmd_err |= DAVINCI_I2C_STR_AL; 415 dev->cmd_err |= DAVINCI_I2C_STR_AL;
416 dev->buf_len = 0;
376 complete(&dev->cmd_complete); 417 complete(&dev->cmd_complete);
377 break; 418 break;
378 419
379 case DAVINCI_I2C_IVR_NACK: 420 case DAVINCI_I2C_IVR_NACK:
380 dev->cmd_err |= DAVINCI_I2C_STR_NACK; 421 dev->cmd_err |= DAVINCI_I2C_STR_NACK;
422 dev->buf_len = 0;
381 complete(&dev->cmd_complete); 423 complete(&dev->cmd_complete);
382 break; 424 break;
383 425
@@ -399,9 +441,10 @@ static irqreturn_t i2c_davinci_isr(int this_irq, void *dev_id)
399 davinci_i2c_write_reg(dev, 441 davinci_i2c_write_reg(dev,
400 DAVINCI_I2C_STR_REG, 442 DAVINCI_I2C_STR_REG,
401 DAVINCI_I2C_IMR_RRDY); 443 DAVINCI_I2C_IMR_RRDY);
402 } else 444 } else {
403 dev_err(dev->dev, "RDR IRQ while no " 445 /* signal can terminate transfer */
404 "data requested\n"); 446 terminate_read(dev);
447 }
405 break; 448 break;
406 449
407 case DAVINCI_I2C_IVR_XRDY: 450 case DAVINCI_I2C_IVR_XRDY:
@@ -418,9 +461,10 @@ static irqreturn_t i2c_davinci_isr(int this_irq, void *dev_id)
418 davinci_i2c_write_reg(dev, 461 davinci_i2c_write_reg(dev,
419 DAVINCI_I2C_IMR_REG, 462 DAVINCI_I2C_IMR_REG,
420 w); 463 w);
421 } else 464 } else {
422 dev_err(dev->dev, "TDR IRQ while no data to " 465 /* signal can terminate transfer */
423 "send\n"); 466 terminate_write(dev);
467 }
424 break; 468 break;
425 469
426 case DAVINCI_I2C_IVR_SCD: 470 case DAVINCI_I2C_IVR_SCD:
@@ -475,6 +519,7 @@ static int davinci_i2c_probe(struct platform_device *pdev)
475 goto err_release_region; 519 goto err_release_region;
476 } 520 }
477 521
522 init_completion(&dev->cmd_complete);
478 dev->dev = get_device(&pdev->dev); 523 dev->dev = get_device(&pdev->dev);
479 dev->irq = irq->start; 524 dev->irq = irq->start;
480 platform_set_drvdata(pdev, dev); 525 platform_set_drvdata(pdev, dev);
diff --git a/drivers/i2c/busses/i2c-elektor.c b/drivers/i2c/busses/i2c-elektor.c
index b7a9977b025f..7f38c01fb3a0 100644
--- a/drivers/i2c/busses/i2c-elektor.c
+++ b/drivers/i2c/busses/i2c-elektor.c
@@ -196,13 +196,11 @@ static struct i2c_algo_pcf_data pcf_isa_data = {
196 .getown = pcf_isa_getown, 196 .getown = pcf_isa_getown,
197 .getclock = pcf_isa_getclock, 197 .getclock = pcf_isa_getclock,
198 .waitforpin = pcf_isa_waitforpin, 198 .waitforpin = pcf_isa_waitforpin,
199 .udelay = 10,
200 .timeout = 100,
201}; 199};
202 200
203static struct i2c_adapter pcf_isa_ops = { 201static struct i2c_adapter pcf_isa_ops = {
204 .owner = THIS_MODULE, 202 .owner = THIS_MODULE,
205 .class = I2C_CLASS_HWMON, 203 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
206 .id = I2C_HW_P_ELEK, 204 .id = I2C_HW_P_ELEK,
207 .algo_data = &pcf_isa_data, 205 .algo_data = &pcf_isa_data,
208 .name = "i2c-elektor", 206 .name = "i2c-elektor",
diff --git a/drivers/i2c/busses/i2c-gpio.c b/drivers/i2c/busses/i2c-gpio.c
index 7c1b762aa681..79b455a1f090 100644
--- a/drivers/i2c/busses/i2c-gpio.c
+++ b/drivers/i2c/busses/i2c-gpio.c
@@ -140,7 +140,7 @@ static int __init i2c_gpio_probe(struct platform_device *pdev)
140 adap->owner = THIS_MODULE; 140 adap->owner = THIS_MODULE;
141 snprintf(adap->name, sizeof(adap->name), "i2c-gpio%d", pdev->id); 141 snprintf(adap->name, sizeof(adap->name), "i2c-gpio%d", pdev->id);
142 adap->algo_data = bit_data; 142 adap->algo_data = bit_data;
143 adap->class = I2C_CLASS_HWMON; 143 adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
144 adap->dev.parent = &pdev->dev; 144 adap->dev.parent = &pdev->dev;
145 145
146 /* 146 /*
diff --git a/drivers/i2c/busses/i2c-hydra.c b/drivers/i2c/busses/i2c-hydra.c
index f9972f9651e4..1098f21ace13 100644
--- a/drivers/i2c/busses/i2c-hydra.c
+++ b/drivers/i2c/busses/i2c-hydra.c
@@ -1,7 +1,4 @@
1/* 1/*
2 i2c-hydra.c - Part of lm_sensors, Linux kernel modules
3 for hardware monitoring
4
5 i2c Support for the Apple `Hydra' Mac I/O 2 i2c Support for the Apple `Hydra' Mac I/O
6 3
7 Copyright (c) 1999-2004 Geert Uytterhoeven <geert@linux-m68k.org> 4 Copyright (c) 1999-2004 Geert Uytterhoeven <geert@linux-m68k.org>
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index b0f771fe4326..dc7ea32b69a8 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -1,10 +1,8 @@
1/* 1/*
2 i2c-i801.c - Part of lm_sensors, Linux kernel modules for hardware
3 monitoring
4 Copyright (c) 1998 - 2002 Frodo Looijaard <frodol@dds.nl>, 2 Copyright (c) 1998 - 2002 Frodo Looijaard <frodol@dds.nl>,
5 Philip Edelbrock <phil@netroedge.com>, and Mark D. Studebaker 3 Philip Edelbrock <phil@netroedge.com>, and Mark D. Studebaker
6 <mdsxyz123@yahoo.com> 4 <mdsxyz123@yahoo.com>
7 Copyright (C) 2007 Jean Delvare <khali@linux-fr.org> 5 Copyright (C) 2007, 2008 Jean Delvare <khali@linux-fr.org>
8 6
9 This program is free software; you can redistribute it and/or modify 7 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by 8 it under the terms of the GNU General Public License as published by
@@ -64,6 +62,7 @@
64#include <linux/ioport.h> 62#include <linux/ioport.h>
65#include <linux/init.h> 63#include <linux/init.h>
66#include <linux/i2c.h> 64#include <linux/i2c.h>
65#include <linux/acpi.h>
67#include <asm/io.h> 66#include <asm/io.h>
68 67
69/* I801 SMBus address offsets */ 68/* I801 SMBus address offsets */
@@ -121,6 +120,10 @@
121#define SMBHSTSTS_INTR 0x02 120#define SMBHSTSTS_INTR 0x02
122#define SMBHSTSTS_HOST_BUSY 0x01 121#define SMBHSTSTS_HOST_BUSY 0x01
123 122
123#define STATUS_FLAGS (SMBHSTSTS_BYTE_DONE | SMBHSTSTS_FAILED | \
124 SMBHSTSTS_BUS_ERR | SMBHSTSTS_DEV_ERR | \
125 SMBHSTSTS_INTR)
126
124static unsigned long i801_smba; 127static unsigned long i801_smba;
125static unsigned char i801_original_hstcfg; 128static unsigned char i801_original_hstcfg;
126static struct pci_driver i801_driver; 129static struct pci_driver i801_driver;
@@ -132,105 +135,137 @@ static struct pci_dev *I801_dev;
132#define FEATURE_I2C_BLOCK_READ (1 << 3) 135#define FEATURE_I2C_BLOCK_READ (1 << 3)
133static unsigned int i801_features; 136static unsigned int i801_features;
134 137
135static int i801_transaction(int xact) 138/* Make sure the SMBus host is ready to start transmitting.
139 Return 0 if it is, -EBUSY if it is not. */
140static int i801_check_pre(void)
136{ 141{
137 int temp; 142 int status;
138 int result = 0;
139 int timeout = 0;
140 143
141 dev_dbg(&I801_dev->dev, "Transaction (pre): CNT=%02x, CMD=%02x, " 144 status = inb_p(SMBHSTSTS);
142 "ADD=%02x, DAT0=%02x, DAT1=%02x\n", inb_p(SMBHSTCNT), 145 if (status & SMBHSTSTS_HOST_BUSY) {
143 inb_p(SMBHSTCMD), inb_p(SMBHSTADD), inb_p(SMBHSTDAT0), 146 dev_err(&I801_dev->dev, "SMBus is busy, can't use it!\n");
144 inb_p(SMBHSTDAT1)); 147 return -EBUSY;
145 148 }
146 /* Make sure the SMBus host is ready to start transmitting */ 149
147 /* 0x1f = Failed, Bus_Err, Dev_Err, Intr, Host_Busy */ 150 status &= STATUS_FLAGS;
148 if ((temp = (0x1f & inb_p(SMBHSTSTS))) != 0x00) { 151 if (status) {
149 dev_dbg(&I801_dev->dev, "SMBus busy (%02x). Resetting...\n", 152 dev_dbg(&I801_dev->dev, "Clearing status flags (%02x)\n",
150 temp); 153 status);
151 outb_p(temp, SMBHSTSTS); 154 outb_p(status, SMBHSTSTS);
152 if ((temp = (0x1f & inb_p(SMBHSTSTS))) != 0x00) { 155 status = inb_p(SMBHSTSTS) & STATUS_FLAGS;
153 dev_dbg(&I801_dev->dev, "Failed! (%02x)\n", temp); 156 if (status) {
154 return -1; 157 dev_err(&I801_dev->dev,
155 } else { 158 "Failed clearing status flags (%02x)\n",
156 dev_dbg(&I801_dev->dev, "Successful!\n"); 159 status);
160 return -EBUSY;
157 } 161 }
158 } 162 }
159 163
160 /* the current contents of SMBHSTCNT can be overwritten, since PEC, 164 return 0;
161 * INTREN, SMBSCMD are passed in xact */ 165}
162 outb_p(xact | I801_START, SMBHSTCNT);
163 166
164 /* We will always wait for a fraction of a second! */ 167/* Convert the status register to an error code, and clear it. */
165 do { 168static int i801_check_post(int status, int timeout)
166 msleep(1); 169{
167 temp = inb_p(SMBHSTSTS); 170 int result = 0;
168 } while ((temp & SMBHSTSTS_HOST_BUSY) && (timeout++ < MAX_TIMEOUT));
169 171
170 /* If the SMBus is still busy, we give up */ 172 /* If the SMBus is still busy, we give up */
171 if (timeout >= MAX_TIMEOUT) { 173 if (timeout) {
172 dev_dbg(&I801_dev->dev, "SMBus Timeout!\n"); 174 dev_err(&I801_dev->dev, "Transaction timeout\n");
173 result = -1;
174 /* try to stop the current command */ 175 /* try to stop the current command */
175 dev_dbg(&I801_dev->dev, "Terminating the current operation\n"); 176 dev_dbg(&I801_dev->dev, "Terminating the current operation\n");
176 outb_p(inb_p(SMBHSTCNT) | SMBHSTCNT_KILL, SMBHSTCNT); 177 outb_p(inb_p(SMBHSTCNT) | SMBHSTCNT_KILL, SMBHSTCNT);
177 msleep(1); 178 msleep(1);
178 outb_p(inb_p(SMBHSTCNT) & (~SMBHSTCNT_KILL), SMBHSTCNT); 179 outb_p(inb_p(SMBHSTCNT) & (~SMBHSTCNT_KILL), SMBHSTCNT);
179 }
180 180
181 if (temp & SMBHSTSTS_FAILED) { 181 /* Check if it worked */
182 result = -1; 182 status = inb_p(SMBHSTSTS);
183 dev_dbg(&I801_dev->dev, "Error: Failed bus transaction\n"); 183 if ((status & SMBHSTSTS_HOST_BUSY) ||
184 !(status & SMBHSTSTS_FAILED))
185 dev_err(&I801_dev->dev,
186 "Failed terminating the transaction\n");
187 outb_p(STATUS_FLAGS, SMBHSTSTS);
188 return -ETIMEDOUT;
184 } 189 }
185 190
186 if (temp & SMBHSTSTS_BUS_ERR) { 191 if (status & SMBHSTSTS_FAILED) {
187 result = -1; 192 result = -EIO;
188 dev_err(&I801_dev->dev, "Bus collision! SMBus may be locked " 193 dev_err(&I801_dev->dev, "Transaction failed\n");
189 "until next hard reset. (sorry!)\n");
190 /* Clock stops and slave is stuck in mid-transmission */
191 } 194 }
192 195 if (status & SMBHSTSTS_DEV_ERR) {
193 if (temp & SMBHSTSTS_DEV_ERR) { 196 result = -ENXIO;
194 result = -1; 197 dev_dbg(&I801_dev->dev, "No response\n");
195 dev_dbg(&I801_dev->dev, "Error: no response!\n"); 198 }
199 if (status & SMBHSTSTS_BUS_ERR) {
200 result = -EAGAIN;
201 dev_dbg(&I801_dev->dev, "Lost arbitration\n");
196 } 202 }
197 203
198 if ((inb_p(SMBHSTSTS) & 0x1f) != 0x00) 204 if (result) {
199 outb_p(inb(SMBHSTSTS), SMBHSTSTS); 205 /* Clear error flags */
200 206 outb_p(status & STATUS_FLAGS, SMBHSTSTS);
201 if ((temp = (0x1f & inb_p(SMBHSTSTS))) != 0x00) { 207 status = inb_p(SMBHSTSTS) & STATUS_FLAGS;
202 dev_dbg(&I801_dev->dev, "Failed reset at end of transaction " 208 if (status) {
203 "(%02x)\n", temp); 209 dev_warn(&I801_dev->dev, "Failed clearing status "
210 "flags at end of transaction (%02x)\n",
211 status);
212 }
204 } 213 }
205 dev_dbg(&I801_dev->dev, "Transaction (post): CNT=%02x, CMD=%02x, " 214
206 "ADD=%02x, DAT0=%02x, DAT1=%02x\n", inb_p(SMBHSTCNT),
207 inb_p(SMBHSTCMD), inb_p(SMBHSTADD), inb_p(SMBHSTDAT0),
208 inb_p(SMBHSTDAT1));
209 return result; 215 return result;
210} 216}
211 217
218static int i801_transaction(int xact)
219{
220 int status;
221 int result;
222 int timeout = 0;
223
224 result = i801_check_pre();
225 if (result < 0)
226 return result;
227
228 /* the current contents of SMBHSTCNT can be overwritten, since PEC,
229 * INTREN, SMBSCMD are passed in xact */
230 outb_p(xact | I801_START, SMBHSTCNT);
231
232 /* We will always wait for a fraction of a second! */
233 do {
234 msleep(1);
235 status = inb_p(SMBHSTSTS);
236 } while ((status & SMBHSTSTS_HOST_BUSY) && (timeout++ < MAX_TIMEOUT));
237
238 result = i801_check_post(status, timeout >= MAX_TIMEOUT);
239 if (result < 0)
240 return result;
241
242 outb_p(SMBHSTSTS_INTR, SMBHSTSTS);
243 return 0;
244}
245
212/* wait for INTR bit as advised by Intel */ 246/* wait for INTR bit as advised by Intel */
213static void i801_wait_hwpec(void) 247static void i801_wait_hwpec(void)
214{ 248{
215 int timeout = 0; 249 int timeout = 0;
216 int temp; 250 int status;
217 251
218 do { 252 do {
219 msleep(1); 253 msleep(1);
220 temp = inb_p(SMBHSTSTS); 254 status = inb_p(SMBHSTSTS);
221 } while ((!(temp & SMBHSTSTS_INTR)) 255 } while ((!(status & SMBHSTSTS_INTR))
222 && (timeout++ < MAX_TIMEOUT)); 256 && (timeout++ < MAX_TIMEOUT));
223 257
224 if (timeout >= MAX_TIMEOUT) { 258 if (timeout >= MAX_TIMEOUT) {
225 dev_dbg(&I801_dev->dev, "PEC Timeout!\n"); 259 dev_dbg(&I801_dev->dev, "PEC Timeout!\n");
226 } 260 }
227 outb_p(temp, SMBHSTSTS); 261 outb_p(status, SMBHSTSTS);
228} 262}
229 263
230static int i801_block_transaction_by_block(union i2c_smbus_data *data, 264static int i801_block_transaction_by_block(union i2c_smbus_data *data,
231 char read_write, int hwpec) 265 char read_write, int hwpec)
232{ 266{
233 int i, len; 267 int i, len;
268 int status;
234 269
235 inb_p(SMBHSTCNT); /* reset the data buffer index */ 270 inb_p(SMBHSTCNT); /* reset the data buffer index */
236 271
@@ -242,14 +277,15 @@ static int i801_block_transaction_by_block(union i2c_smbus_data *data,
242 outb_p(data->block[i+1], SMBBLKDAT); 277 outb_p(data->block[i+1], SMBBLKDAT);
243 } 278 }
244 279
245 if (i801_transaction(I801_BLOCK_DATA | ENABLE_INT9 | 280 status = i801_transaction(I801_BLOCK_DATA | ENABLE_INT9 |
246 I801_PEC_EN * hwpec)) 281 I801_PEC_EN * hwpec);
247 return -1; 282 if (status)
283 return status;
248 284
249 if (read_write == I2C_SMBUS_READ) { 285 if (read_write == I2C_SMBUS_READ) {
250 len = inb_p(SMBHSTDAT0); 286 len = inb_p(SMBHSTDAT0);
251 if (len < 1 || len > I2C_SMBUS_BLOCK_MAX) 287 if (len < 1 || len > I2C_SMBUS_BLOCK_MAX)
252 return -1; 288 return -EPROTO;
253 289
254 data->block[0] = len; 290 data->block[0] = len;
255 for (i = 0; i < len; i++) 291 for (i = 0; i < len; i++)
@@ -264,10 +300,13 @@ static int i801_block_transaction_byte_by_byte(union i2c_smbus_data *data,
264{ 300{
265 int i, len; 301 int i, len;
266 int smbcmd; 302 int smbcmd;
267 int temp; 303 int status;
268 int result = 0; 304 int result;
269 int timeout; 305 int timeout;
270 unsigned char errmask; 306
307 result = i801_check_pre();
308 if (result < 0)
309 return result;
271 310
272 len = data->block[0]; 311 len = data->block[0];
273 312
@@ -291,36 +330,6 @@ static int i801_block_transaction_byte_by_byte(union i2c_smbus_data *data,
291 } 330 }
292 outb_p(smbcmd | ENABLE_INT9, SMBHSTCNT); 331 outb_p(smbcmd | ENABLE_INT9, SMBHSTCNT);
293 332
294 dev_dbg(&I801_dev->dev, "Block (pre %d): CNT=%02x, CMD=%02x, "
295 "ADD=%02x, DAT0=%02x, DAT1=%02x, BLKDAT=%02x\n", i,
296 inb_p(SMBHSTCNT), inb_p(SMBHSTCMD), inb_p(SMBHSTADD),
297 inb_p(SMBHSTDAT0), inb_p(SMBHSTDAT1), inb_p(SMBBLKDAT));
298
299 /* Make sure the SMBus host is ready to start transmitting */
300 temp = inb_p(SMBHSTSTS);
301 if (i == 1) {
302 /* Erroneous conditions before transaction:
303 * Byte_Done, Failed, Bus_Err, Dev_Err, Intr, Host_Busy */
304 errmask = 0x9f;
305 } else {
306 /* Erroneous conditions during transaction:
307 * Failed, Bus_Err, Dev_Err, Intr */
308 errmask = 0x1e;
309 }
310 if (temp & errmask) {
311 dev_dbg(&I801_dev->dev, "SMBus busy (%02x). "
312 "Resetting...\n", temp);
313 outb_p(temp, SMBHSTSTS);
314 if (((temp = inb_p(SMBHSTSTS)) & errmask) != 0x00) {
315 dev_err(&I801_dev->dev,
316 "Reset failed! (%02x)\n", temp);
317 return -1;
318 }
319 if (i != 1)
320 /* if die in middle of block transaction, fail */
321 return -1;
322 }
323
324 if (i == 1) 333 if (i == 1)
325 outb_p(inb(SMBHSTCNT) | I801_START, SMBHSTCNT); 334 outb_p(inb(SMBHSTCNT) | I801_START, SMBHSTCNT);
326 335
@@ -328,41 +337,28 @@ static int i801_block_transaction_byte_by_byte(union i2c_smbus_data *data,
328 timeout = 0; 337 timeout = 0;
329 do { 338 do {
330 msleep(1); 339 msleep(1);
331 temp = inb_p(SMBHSTSTS); 340 status = inb_p(SMBHSTSTS);
332 } 341 }
333 while ((!(temp & SMBHSTSTS_BYTE_DONE)) 342 while ((!(status & SMBHSTSTS_BYTE_DONE))
334 && (timeout++ < MAX_TIMEOUT)); 343 && (timeout++ < MAX_TIMEOUT));
335 344
336 /* If the SMBus is still busy, we give up */ 345 result = i801_check_post(status, timeout >= MAX_TIMEOUT);
337 if (timeout >= MAX_TIMEOUT) { 346 if (result < 0)
338 /* try to stop the current command */ 347 return result;
339 dev_dbg(&I801_dev->dev, "Terminating the current "
340 "operation\n");
341 outb_p(inb_p(SMBHSTCNT) | SMBHSTCNT_KILL, SMBHSTCNT);
342 msleep(1);
343 outb_p(inb_p(SMBHSTCNT) & (~SMBHSTCNT_KILL),
344 SMBHSTCNT);
345 result = -1;
346 dev_dbg(&I801_dev->dev, "SMBus Timeout!\n");
347 }
348
349 if (temp & SMBHSTSTS_FAILED) {
350 result = -1;
351 dev_dbg(&I801_dev->dev,
352 "Error: Failed bus transaction\n");
353 } else if (temp & SMBHSTSTS_BUS_ERR) {
354 result = -1;
355 dev_err(&I801_dev->dev, "Bus collision!\n");
356 } else if (temp & SMBHSTSTS_DEV_ERR) {
357 result = -1;
358 dev_dbg(&I801_dev->dev, "Error: no response!\n");
359 }
360 348
361 if (i == 1 && read_write == I2C_SMBUS_READ 349 if (i == 1 && read_write == I2C_SMBUS_READ
362 && command != I2C_SMBUS_I2C_BLOCK_DATA) { 350 && command != I2C_SMBUS_I2C_BLOCK_DATA) {
363 len = inb_p(SMBHSTDAT0); 351 len = inb_p(SMBHSTDAT0);
364 if (len < 1 || len > I2C_SMBUS_BLOCK_MAX) 352 if (len < 1 || len > I2C_SMBUS_BLOCK_MAX) {
365 return -1; 353 dev_err(&I801_dev->dev,
354 "Illegal SMBus block read size %d\n",
355 len);
356 /* Recover */
357 while (inb_p(SMBHSTSTS) & SMBHSTSTS_HOST_BUSY)
358 outb_p(SMBHSTSTS_BYTE_DONE, SMBHSTSTS);
359 outb_p(SMBHSTSTS_INTR, SMBHSTSTS);
360 return -EPROTO;
361 }
366 data->block[0] = len; 362 data->block[0] = len;
367 } 363 }
368 364
@@ -371,30 +367,19 @@ static int i801_block_transaction_byte_by_byte(union i2c_smbus_data *data,
371 data->block[i] = inb_p(SMBBLKDAT); 367 data->block[i] = inb_p(SMBBLKDAT);
372 if (read_write == I2C_SMBUS_WRITE && i+1 <= len) 368 if (read_write == I2C_SMBUS_WRITE && i+1 <= len)
373 outb_p(data->block[i+1], SMBBLKDAT); 369 outb_p(data->block[i+1], SMBBLKDAT);
374 if ((temp & 0x9e) != 0x00)
375 outb_p(temp, SMBHSTSTS); /* signals SMBBLKDAT ready */
376
377 if ((temp = (0x1e & inb_p(SMBHSTSTS))) != 0x00) {
378 dev_dbg(&I801_dev->dev,
379 "Bad status (%02x) at end of transaction\n",
380 temp);
381 }
382 dev_dbg(&I801_dev->dev, "Block (post %d): CNT=%02x, CMD=%02x, "
383 "ADD=%02x, DAT0=%02x, DAT1=%02x, BLKDAT=%02x\n", i,
384 inb_p(SMBHSTCNT), inb_p(SMBHSTCMD), inb_p(SMBHSTADD),
385 inb_p(SMBHSTDAT0), inb_p(SMBHSTDAT1), inb_p(SMBBLKDAT));
386 370
387 if (result < 0) 371 /* signals SMBBLKDAT ready */
388 return result; 372 outb_p(SMBHSTSTS_BYTE_DONE | SMBHSTSTS_INTR, SMBHSTSTS);
389 } 373 }
390 return result; 374
375 return 0;
391} 376}
392 377
393static int i801_set_block_buffer_mode(void) 378static int i801_set_block_buffer_mode(void)
394{ 379{
395 outb_p(inb_p(SMBAUXCTL) | SMBAUXCTL_E32B, SMBAUXCTL); 380 outb_p(inb_p(SMBAUXCTL) | SMBAUXCTL_E32B, SMBAUXCTL);
396 if ((inb_p(SMBAUXCTL) & SMBAUXCTL_E32B) == 0) 381 if ((inb_p(SMBAUXCTL) & SMBAUXCTL_E32B) == 0)
397 return -1; 382 return -EIO;
398 return 0; 383 return 0;
399} 384}
400 385
@@ -414,7 +399,7 @@ static int i801_block_transaction(union i2c_smbus_data *data, char read_write,
414 } else if (!(i801_features & FEATURE_I2C_BLOCK_READ)) { 399 } else if (!(i801_features & FEATURE_I2C_BLOCK_READ)) {
415 dev_err(&I801_dev->dev, 400 dev_err(&I801_dev->dev,
416 "I2C block read is unsupported!\n"); 401 "I2C block read is unsupported!\n");
417 return -1; 402 return -EOPNOTSUPP;
418 } 403 }
419 } 404 }
420 405
@@ -449,7 +434,7 @@ static int i801_block_transaction(union i2c_smbus_data *data, char read_write,
449 return result; 434 return result;
450} 435}
451 436
452/* Return -1 on error. */ 437/* Return negative errno on error. */
453static s32 i801_access(struct i2c_adapter * adap, u16 addr, 438static s32 i801_access(struct i2c_adapter * adap, u16 addr,
454 unsigned short flags, char read_write, u8 command, 439 unsigned short flags, char read_write, u8 command,
455 int size, union i2c_smbus_data * data) 440 int size, union i2c_smbus_data * data)
@@ -511,10 +496,9 @@ static s32 i801_access(struct i2c_adapter * adap, u16 addr,
511 outb_p(command, SMBHSTCMD); 496 outb_p(command, SMBHSTCMD);
512 block = 1; 497 block = 1;
513 break; 498 break;
514 case I2C_SMBUS_PROC_CALL:
515 default: 499 default:
516 dev_err(&I801_dev->dev, "Unsupported transaction %d\n", size); 500 dev_err(&I801_dev->dev, "Unsupported transaction %d\n", size);
517 return -1; 501 return -EOPNOTSUPP;
518 } 502 }
519 503
520 if (hwpec) /* enable/disable hardware PEC */ 504 if (hwpec) /* enable/disable hardware PEC */
@@ -537,7 +521,7 @@ static s32 i801_access(struct i2c_adapter * adap, u16 addr,
537 if(block) 521 if(block)
538 return ret; 522 return ret;
539 if(ret) 523 if(ret)
540 return -1; 524 return ret;
541 if ((read_write == I2C_SMBUS_WRITE) || (xact == I801_QUICK)) 525 if ((read_write == I2C_SMBUS_WRITE) || (xact == I801_QUICK))
542 return 0; 526 return 0;
543 527
@@ -572,7 +556,7 @@ static const struct i2c_algorithm smbus_algorithm = {
572static struct i2c_adapter i801_adapter = { 556static struct i2c_adapter i801_adapter = {
573 .owner = THIS_MODULE, 557 .owner = THIS_MODULE,
574 .id = I2C_HW_SMBUS_I801, 558 .id = I2C_HW_SMBUS_I801,
575 .class = I2C_CLASS_HWMON, 559 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
576 .algo = &smbus_algorithm, 560 .algo = &smbus_algorithm,
577}; 561};
578 562
@@ -639,6 +623,10 @@ static int __devinit i801_probe(struct pci_dev *dev, const struct pci_device_id
639 goto exit; 623 goto exit;
640 } 624 }
641 625
626 err = acpi_check_resource_conflict(&dev->resource[SMBBAR]);
627 if (err)
628 goto exit;
629
642 err = pci_request_region(dev, SMBBAR, i801_driver.name); 630 err = pci_request_region(dev, SMBBAR, i801_driver.name);
643 if (err) { 631 if (err) {
644 dev_err(&dev->dev, "Failed to request SMBus region " 632 dev_err(&dev->dev, "Failed to request SMBus region "
diff --git a/drivers/i2c/busses/i2c-i810.c b/drivers/i2c/busses/i2c-i810.c
deleted file mode 100644
index 42e8d94c276f..000000000000
--- a/drivers/i2c/busses/i2c-i810.c
+++ /dev/null
@@ -1,260 +0,0 @@
1/*
2 i2c-i810.c - Part of lm_sensors, Linux kernel modules for hardware
3 monitoring
4 Copyright (c) 1998, 1999, 2000 Frodo Looijaard <frodol@dds.nl>,
5 Philip Edelbrock <phil@netroedge.com>,
6 Ralph Metzler <rjkm@thp.uni-koeln.de>, and
7 Mark D. Studebaker <mdsxyz123@yahoo.com>
8
9 Based on code written by Ralph Metzler <rjkm@thp.uni-koeln.de> and
10 Simon Vogl
11
12 This program is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 2 of the License, or
15 (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25*/
26/*
27 This interfaces to the I810/I815 to provide access to
28 the DDC Bus and the I2C Bus.
29
30 SUPPORTED DEVICES PCI ID
31 i810AA 7121
32 i810AB 7123
33 i810E 7125
34 i815 1132
35 i845G 2562
36*/
37
38#include <linux/kernel.h>
39#include <linux/module.h>
40#include <linux/init.h>
41#include <linux/pci.h>
42#include <linux/i2c.h>
43#include <linux/i2c-algo-bit.h>
44#include <asm/io.h>
45
46/* GPIO register locations */
47#define I810_IOCONTROL_OFFSET 0x5000
48#define I810_HVSYNC 0x00 /* not used */
49#define I810_GPIOA 0x10
50#define I810_GPIOB 0x14
51
52/* bit locations in the registers */
53#define SCL_DIR_MASK 0x0001
54#define SCL_DIR 0x0002
55#define SCL_VAL_MASK 0x0004
56#define SCL_VAL_OUT 0x0008
57#define SCL_VAL_IN 0x0010
58#define SDA_DIR_MASK 0x0100
59#define SDA_DIR 0x0200
60#define SDA_VAL_MASK 0x0400
61#define SDA_VAL_OUT 0x0800
62#define SDA_VAL_IN 0x1000
63
64/* initialization states */
65#define INIT1 0x1
66#define INIT2 0x2
67#define INIT3 0x4
68
69/* delays */
70#define CYCLE_DELAY 10
71#define TIMEOUT (HZ / 2)
72
73static void __iomem *ioaddr;
74
75/* The i810 GPIO registers have individual masks for each bit
76 so we never have to read before writing. Nice. */
77
78static void bit_i810i2c_setscl(void *data, int val)
79{
80 writel((val ? SCL_VAL_OUT : 0) | SCL_DIR | SCL_DIR_MASK | SCL_VAL_MASK,
81 ioaddr + I810_GPIOB);
82 readl(ioaddr + I810_GPIOB); /* flush posted write */
83}
84
85static void bit_i810i2c_setsda(void *data, int val)
86{
87 writel((val ? SDA_VAL_OUT : 0) | SDA_DIR | SDA_DIR_MASK | SDA_VAL_MASK,
88 ioaddr + I810_GPIOB);
89 readl(ioaddr + I810_GPIOB); /* flush posted write */
90}
91
92/* The GPIO pins are open drain, so the pins could always remain outputs.
93 However, some chip versions don't latch the inputs unless they
94 are set as inputs.
95 We rely on the i2c-algo-bit routines to set the pins high before
96 reading the input from other chips. Following guidance in the 815
97 prog. ref. guide, we do a "dummy write" of 0 to the register before
98 reading which forces the input value to be latched. We presume this
99 applies to the 810 as well; shouldn't hurt anyway. This is necessary to get
100 i2c_algo_bit bit_test=1 to pass. */
101
102static int bit_i810i2c_getscl(void *data)
103{
104 writel(SCL_DIR_MASK, ioaddr + I810_GPIOB);
105 writel(0, ioaddr + I810_GPIOB);
106 return (0 != (readl(ioaddr + I810_GPIOB) & SCL_VAL_IN));
107}
108
109static int bit_i810i2c_getsda(void *data)
110{
111 writel(SDA_DIR_MASK, ioaddr + I810_GPIOB);
112 writel(0, ioaddr + I810_GPIOB);
113 return (0 != (readl(ioaddr + I810_GPIOB) & SDA_VAL_IN));
114}
115
116static void bit_i810ddc_setscl(void *data, int val)
117{
118 writel((val ? SCL_VAL_OUT : 0) | SCL_DIR | SCL_DIR_MASK | SCL_VAL_MASK,
119 ioaddr + I810_GPIOA);
120 readl(ioaddr + I810_GPIOA); /* flush posted write */
121}
122
123static void bit_i810ddc_setsda(void *data, int val)
124{
125 writel((val ? SDA_VAL_OUT : 0) | SDA_DIR | SDA_DIR_MASK | SDA_VAL_MASK,
126 ioaddr + I810_GPIOA);
127 readl(ioaddr + I810_GPIOA); /* flush posted write */
128}
129
130static int bit_i810ddc_getscl(void *data)
131{
132 writel(SCL_DIR_MASK, ioaddr + I810_GPIOA);
133 writel(0, ioaddr + I810_GPIOA);
134 return (0 != (readl(ioaddr + I810_GPIOA) & SCL_VAL_IN));
135}
136
137static int bit_i810ddc_getsda(void *data)
138{
139 writel(SDA_DIR_MASK, ioaddr + I810_GPIOA);
140 writel(0, ioaddr + I810_GPIOA);
141 return (0 != (readl(ioaddr + I810_GPIOA) & SDA_VAL_IN));
142}
143
144static int config_i810(struct pci_dev *dev)
145{
146 unsigned long cadr;
147
148 /* map I810 memory */
149 cadr = dev->resource[1].start;
150 cadr += I810_IOCONTROL_OFFSET;
151 cadr &= PCI_BASE_ADDRESS_MEM_MASK;
152 ioaddr = ioremap_nocache(cadr, 0x1000);
153 if (ioaddr) {
154 bit_i810i2c_setscl(NULL, 1);
155 bit_i810i2c_setsda(NULL, 1);
156 bit_i810ddc_setscl(NULL, 1);
157 bit_i810ddc_setsda(NULL, 1);
158 return 0;
159 }
160 return -ENODEV;
161}
162
163static struct i2c_algo_bit_data i810_i2c_bit_data = {
164 .setsda = bit_i810i2c_setsda,
165 .setscl = bit_i810i2c_setscl,
166 .getsda = bit_i810i2c_getsda,
167 .getscl = bit_i810i2c_getscl,
168 .udelay = CYCLE_DELAY,
169 .timeout = TIMEOUT,
170};
171
172static struct i2c_adapter i810_i2c_adapter = {
173 .owner = THIS_MODULE,
174 .id = I2C_HW_B_I810,
175 .name = "I810/I815 I2C Adapter",
176 .algo_data = &i810_i2c_bit_data,
177};
178
179static struct i2c_algo_bit_data i810_ddc_bit_data = {
180 .setsda = bit_i810ddc_setsda,
181 .setscl = bit_i810ddc_setscl,
182 .getsda = bit_i810ddc_getsda,
183 .getscl = bit_i810ddc_getscl,
184 .udelay = CYCLE_DELAY,
185 .timeout = TIMEOUT,
186};
187
188static struct i2c_adapter i810_ddc_adapter = {
189 .owner = THIS_MODULE,
190 .id = I2C_HW_B_I810,
191 .name = "I810/I815 DDC Adapter",
192 .algo_data = &i810_ddc_bit_data,
193};
194
195static struct pci_device_id i810_ids[] __devinitdata = {
196 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82810_IG1) },
197 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82810_IG3) },
198 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82810E_IG) },
199 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_CGC) },
200 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82845G_IG) },
201 { 0, },
202};
203
204MODULE_DEVICE_TABLE (pci, i810_ids);
205
206static int __devinit i810_probe(struct pci_dev *dev, const struct pci_device_id *id)
207{
208 int retval;
209
210 retval = config_i810(dev);
211 if (retval)
212 return retval;
213 dev_info(&dev->dev, "i810/i815 i2c device found.\n");
214
215 /* set up the sysfs linkage to our parent device */
216 i810_i2c_adapter.dev.parent = &dev->dev;
217 i810_ddc_adapter.dev.parent = &dev->dev;
218
219 retval = i2c_bit_add_bus(&i810_i2c_adapter);
220 if (retval)
221 return retval;
222 retval = i2c_bit_add_bus(&i810_ddc_adapter);
223 if (retval)
224 i2c_del_adapter(&i810_i2c_adapter);
225 return retval;
226}
227
228static void __devexit i810_remove(struct pci_dev *dev)
229{
230 i2c_del_adapter(&i810_ddc_adapter);
231 i2c_del_adapter(&i810_i2c_adapter);
232 iounmap(ioaddr);
233}
234
235static struct pci_driver i810_driver = {
236 .name = "i810_smbus",
237 .id_table = i810_ids,
238 .probe = i810_probe,
239 .remove = __devexit_p(i810_remove),
240};
241
242static int __init i2c_i810_init(void)
243{
244 return pci_register_driver(&i810_driver);
245}
246
247static void __exit i2c_i810_exit(void)
248{
249 pci_unregister_driver(&i810_driver);
250}
251
252MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl>, "
253 "Philip Edelbrock <phil@netroedge.com>, "
254 "Ralph Metzler <rjkm@thp.uni-koeln.de>, "
255 "and Mark D. Studebaker <mdsxyz123@yahoo.com>");
256MODULE_DESCRIPTION("I810/I815 I2C/DDC driver");
257MODULE_LICENSE("GPL");
258
259module_init(i2c_i810_init);
260module_exit(i2c_i810_exit);
diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c
index 85dbf34382e1..651f2f1ae5b7 100644
--- a/drivers/i2c/busses/i2c-ibm_iic.c
+++ b/drivers/i2c/busses/i2c-ibm_iic.c
@@ -42,13 +42,8 @@
42#include <asm/io.h> 42#include <asm/io.h>
43#include <linux/i2c.h> 43#include <linux/i2c.h>
44#include <linux/i2c-id.h> 44#include <linux/i2c-id.h>
45
46#ifdef CONFIG_IBM_OCP
47#include <asm/ocp.h>
48#include <asm/ibm4xx.h>
49#else
50#include <linux/of_platform.h> 45#include <linux/of_platform.h>
51#endif 46#include <linux/of_i2c.h>
52 47
53#include "i2c-ibm_iic.h" 48#include "i2c-ibm_iic.h"
54 49
@@ -665,180 +660,6 @@ static inline u8 iic_clckdiv(unsigned int opb)
665 return (u8)((opb + 9) / 10 - 1); 660 return (u8)((opb + 9) / 10 - 1);
666} 661}
667 662
668#ifdef CONFIG_IBM_OCP
669/*
670 * Register single IIC interface
671 */
672static int __devinit iic_probe(struct ocp_device *ocp){
673
674 struct ibm_iic_private* dev;
675 struct i2c_adapter* adap;
676 struct ocp_func_iic_data* iic_data = ocp->def->additions;
677 int ret;
678
679 if (!iic_data)
680 printk(KERN_WARNING"ibm-iic%d: missing additional data!\n",
681 ocp->def->index);
682
683 if (!(dev = kzalloc(sizeof(*dev), GFP_KERNEL))) {
684 printk(KERN_ERR "ibm-iic%d: failed to allocate device data\n",
685 ocp->def->index);
686 return -ENOMEM;
687 }
688
689 dev->idx = ocp->def->index;
690 ocp_set_drvdata(ocp, dev);
691
692 if (!request_mem_region(ocp->def->paddr, sizeof(struct iic_regs),
693 "ibm_iic")) {
694 ret = -EBUSY;
695 goto fail1;
696 }
697
698 if (!(dev->vaddr = ioremap(ocp->def->paddr, sizeof(struct iic_regs)))){
699 printk(KERN_ERR "ibm-iic%d: failed to ioremap device registers\n",
700 dev->idx);
701 ret = -ENXIO;
702 goto fail2;
703 }
704
705 init_waitqueue_head(&dev->wq);
706
707 dev->irq = iic_force_poll ? -1 : ocp->def->irq;
708 if (dev->irq >= 0){
709 /* Disable interrupts until we finish initialization,
710 assumes level-sensitive IRQ setup...
711 */
712 iic_interrupt_mode(dev, 0);
713 if (request_irq(dev->irq, iic_handler, 0, "IBM IIC", dev)){
714 printk(KERN_ERR "ibm-iic%d: request_irq %d failed\n",
715 dev->idx, dev->irq);
716 /* Fallback to the polling mode */
717 dev->irq = -1;
718 }
719 }
720
721 if (dev->irq < 0)
722 printk(KERN_WARNING "ibm-iic%d: using polling mode\n",
723 dev->idx);
724
725 /* Board specific settings */
726 dev->fast_mode = iic_force_fast ? 1 : (iic_data ? iic_data->fast_mode : 0);
727
728 /* clckdiv is the same for *all* IIC interfaces,
729 * but I'd rather make a copy than introduce another global. --ebs
730 */
731 dev->clckdiv = iic_clckdiv(ocp_sys_info.opb_bus_freq);
732 DBG("%d: clckdiv = %d\n", dev->idx, dev->clckdiv);
733
734 /* Initialize IIC interface */
735 iic_dev_init(dev);
736
737 /* Register it with i2c layer */
738 adap = &dev->adap;
739 adap->dev.parent = &ocp->dev;
740 strcpy(adap->name, "IBM IIC");
741 i2c_set_adapdata(adap, dev);
742 adap->id = I2C_HW_OCP;
743 adap->class = I2C_CLASS_HWMON;
744 adap->algo = &iic_algo;
745 adap->client_register = NULL;
746 adap->client_unregister = NULL;
747 adap->timeout = 1;
748
749 /*
750 * If "dev->idx" is negative we consider it as zero.
751 * The reason to do so is to avoid sysfs names that only make
752 * sense when there are multiple adapters.
753 */
754 adap->nr = dev->idx >= 0 ? dev->idx : 0;
755
756 if ((ret = i2c_add_numbered_adapter(adap)) < 0) {
757 printk(KERN_ERR "ibm-iic%d: failed to register i2c adapter\n",
758 dev->idx);
759 goto fail;
760 }
761
762 printk(KERN_INFO "ibm-iic%d: using %s mode\n", dev->idx,
763 dev->fast_mode ? "fast (400 kHz)" : "standard (100 kHz)");
764
765 return 0;
766
767fail:
768 if (dev->irq >= 0){
769 iic_interrupt_mode(dev, 0);
770 free_irq(dev->irq, dev);
771 }
772
773 iounmap(dev->vaddr);
774fail2:
775 release_mem_region(ocp->def->paddr, sizeof(struct iic_regs));
776fail1:
777 ocp_set_drvdata(ocp, NULL);
778 kfree(dev);
779 return ret;
780}
781
782/*
783 * Cleanup initialized IIC interface
784 */
785static void __devexit iic_remove(struct ocp_device *ocp)
786{
787 struct ibm_iic_private* dev = (struct ibm_iic_private*)ocp_get_drvdata(ocp);
788 BUG_ON(dev == NULL);
789 if (i2c_del_adapter(&dev->adap)){
790 printk(KERN_ERR "ibm-iic%d: failed to delete i2c adapter :(\n",
791 dev->idx);
792 /* That's *very* bad, just shutdown IRQ ... */
793 if (dev->irq >= 0){
794 iic_interrupt_mode(dev, 0);
795 free_irq(dev->irq, dev);
796 dev->irq = -1;
797 }
798 } else {
799 if (dev->irq >= 0){
800 iic_interrupt_mode(dev, 0);
801 free_irq(dev->irq, dev);
802 }
803 iounmap(dev->vaddr);
804 release_mem_region(ocp->def->paddr, sizeof(struct iic_regs));
805 kfree(dev);
806 }
807}
808
809static struct ocp_device_id ibm_iic_ids[] __devinitdata =
810{
811 { .vendor = OCP_VENDOR_IBM, .function = OCP_FUNC_IIC },
812 { .vendor = OCP_VENDOR_INVALID }
813};
814
815MODULE_DEVICE_TABLE(ocp, ibm_iic_ids);
816
817static struct ocp_driver ibm_iic_driver =
818{
819 .name = "iic",
820 .id_table = ibm_iic_ids,
821 .probe = iic_probe,
822 .remove = __devexit_p(iic_remove),
823#if defined(CONFIG_PM)
824 .suspend = NULL,
825 .resume = NULL,
826#endif
827};
828
829static int __init iic_init(void)
830{
831 printk(KERN_INFO "IBM IIC driver v" DRIVER_VERSION "\n");
832 return ocp_register_driver(&ibm_iic_driver);
833}
834
835static void __exit iic_exit(void)
836{
837 ocp_unregister_driver(&ibm_iic_driver);
838}
839
840#else /* !CONFIG_IBM_OCP */
841
842static int __devinit iic_request_irq(struct of_device *ofdev, 663static int __devinit iic_request_irq(struct of_device *ofdev,
843 struct ibm_iic_private *dev) 664 struct ibm_iic_private *dev)
844{ 665{
@@ -876,7 +697,7 @@ static int __devinit iic_probe(struct of_device *ofdev,
876 struct device_node *np = ofdev->node; 697 struct device_node *np = ofdev->node;
877 struct ibm_iic_private *dev; 698 struct ibm_iic_private *dev;
878 struct i2c_adapter *adap; 699 struct i2c_adapter *adap;
879 const u32 *indexp, *freq; 700 const u32 *freq;
880 int ret; 701 int ret;
881 702
882 dev = kzalloc(sizeof(*dev), GFP_KERNEL); 703 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
@@ -887,14 +708,6 @@ static int __devinit iic_probe(struct of_device *ofdev,
887 708
888 dev_set_drvdata(&ofdev->dev, dev); 709 dev_set_drvdata(&ofdev->dev, dev);
889 710
890 indexp = of_get_property(np, "index", NULL);
891 if (!indexp) {
892 dev_err(&ofdev->dev, "no index specified\n");
893 ret = -EINVAL;
894 goto error_cleanup;
895 }
896 dev->idx = *indexp;
897
898 dev->vaddr = of_iomap(np, 0); 711 dev->vaddr = of_iomap(np, 0);
899 if (dev->vaddr == NULL) { 712 if (dev->vaddr == NULL) {
900 dev_err(&ofdev->dev, "failed to iomap device\n"); 713 dev_err(&ofdev->dev, "failed to iomap device\n");
@@ -934,17 +747,19 @@ static int __devinit iic_probe(struct of_device *ofdev,
934 strlcpy(adap->name, "IBM IIC", sizeof(adap->name)); 747 strlcpy(adap->name, "IBM IIC", sizeof(adap->name));
935 i2c_set_adapdata(adap, dev); 748 i2c_set_adapdata(adap, dev);
936 adap->id = I2C_HW_OCP; 749 adap->id = I2C_HW_OCP;
937 adap->class = I2C_CLASS_HWMON; 750 adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
938 adap->algo = &iic_algo; 751 adap->algo = &iic_algo;
939 adap->timeout = 1; 752 adap->timeout = 1;
940 adap->nr = dev->idx;
941 753
942 ret = i2c_add_numbered_adapter(adap); 754 ret = i2c_add_adapter(adap);
943 if (ret < 0) { 755 if (ret < 0) {
944 dev_err(&ofdev->dev, "failed to register i2c adapter\n"); 756 dev_err(&ofdev->dev, "failed to register i2c adapter\n");
945 goto error_cleanup; 757 goto error_cleanup;
946 } 758 }
947 759
760 /* Now register all the child nodes */
761 of_register_i2c_devices(adap, np);
762
948 dev_info(&ofdev->dev, "using %s mode\n", 763 dev_info(&ofdev->dev, "using %s mode\n",
949 dev->fast_mode ? "fast (400 kHz)" : "standard (100 kHz)"); 764 dev->fast_mode ? "fast (400 kHz)" : "standard (100 kHz)");
950 765
@@ -987,11 +802,7 @@ static int __devexit iic_remove(struct of_device *ofdev)
987} 802}
988 803
989static const struct of_device_id ibm_iic_match[] = { 804static const struct of_device_id ibm_iic_match[] = {
990 { .compatible = "ibm,iic-405ex", }, 805 { .compatible = "ibm,iic", },
991 { .compatible = "ibm,iic-405gp", },
992 { .compatible = "ibm,iic-440gp", },
993 { .compatible = "ibm,iic-440gpx", },
994 { .compatible = "ibm,iic-440grx", },
995 {} 806 {}
996}; 807};
997 808
@@ -1011,7 +822,6 @@ static void __exit iic_exit(void)
1011{ 822{
1012 of_unregister_platform_driver(&ibm_iic_driver); 823 of_unregister_platform_driver(&ibm_iic_driver);
1013} 824}
1014#endif /* CONFIG_IBM_OCP */
1015 825
1016module_init(iic_init); 826module_init(iic_init);
1017module_exit(iic_exit); 827module_exit(iic_exit);
diff --git a/drivers/i2c/busses/i2c-iop3xx.c b/drivers/i2c/busses/i2c-iop3xx.c
index 39884e797594..fc2714ac0c0f 100644
--- a/drivers/i2c/busses/i2c-iop3xx.c
+++ b/drivers/i2c/busses/i2c-iop3xx.c
@@ -482,7 +482,7 @@ iop3xx_i2c_probe(struct platform_device *pdev)
482 memcpy(new_adapter->name, pdev->name, strlen(pdev->name)); 482 memcpy(new_adapter->name, pdev->name, strlen(pdev->name));
483 new_adapter->id = I2C_HW_IOP3XX; 483 new_adapter->id = I2C_HW_IOP3XX;
484 new_adapter->owner = THIS_MODULE; 484 new_adapter->owner = THIS_MODULE;
485 new_adapter->class = I2C_CLASS_HWMON; 485 new_adapter->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
486 new_adapter->dev.parent = &pdev->dev; 486 new_adapter->dev.parent = &pdev->dev;
487 new_adapter->nr = pdev->id; 487 new_adapter->nr = pdev->id;
488 488
diff --git a/drivers/i2c/busses/i2c-isch.c b/drivers/i2c/busses/i2c-isch.c
new file mode 100644
index 000000000000..b9c01aa90036
--- /dev/null
+++ b/drivers/i2c/busses/i2c-isch.c
@@ -0,0 +1,339 @@
1/*
2 i2c-isch.c - Linux kernel driver for Intel SCH chipset SMBus
3 - Based on i2c-piix4.c
4 Copyright (c) 1998 - 2002 Frodo Looijaard <frodol@dds.nl> and
5 Philip Edelbrock <phil@netroedge.com>
6 - Intel SCH support
7 Copyright (c) 2007 - 2008 Jacob Jun Pan <jacob.jun.pan@intel.com>
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License version 2 as
11 published by the Free Software Foundation.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21*/
22
23/*
24 Supports:
25 Intel SCH chipsets (AF82US15W, AF82US15L, AF82UL11L)
26 Note: we assume there can only be one device, with one SMBus interface.
27*/
28
29#include <linux/module.h>
30#include <linux/pci.h>
31#include <linux/kernel.h>
32#include <linux/delay.h>
33#include <linux/stddef.h>
34#include <linux/ioport.h>
35#include <linux/i2c.h>
36#include <linux/init.h>
37#include <linux/io.h>
38#include <linux/acpi.h>
39
40/* SCH SMBus address offsets */
41#define SMBHSTCNT (0 + sch_smba)
42#define SMBHSTSTS (1 + sch_smba)
43#define SMBHSTADD (4 + sch_smba) /* TSA */
44#define SMBHSTCMD (5 + sch_smba)
45#define SMBHSTDAT0 (6 + sch_smba)
46#define SMBHSTDAT1 (7 + sch_smba)
47#define SMBBLKDAT (0x20 + sch_smba)
48
49/* count for request_region */
50#define SMBIOSIZE 64
51
52/* PCI Address Constants */
53#define SMBBA_SCH 0x40
54
55/* Other settings */
56#define MAX_TIMEOUT 500
57
58/* I2C constants */
59#define SCH_QUICK 0x00
60#define SCH_BYTE 0x01
61#define SCH_BYTE_DATA 0x02
62#define SCH_WORD_DATA 0x03
63#define SCH_BLOCK_DATA 0x05
64
65static unsigned short sch_smba;
66static struct pci_driver sch_driver;
67static struct i2c_adapter sch_adapter;
68
69/*
70 * Start the i2c transaction -- the i2c_access will prepare the transaction
71 * and this function will execute it.
72 * return 0 for success and others for failure.
73 */
74static int sch_transaction(void)
75{
76 int temp;
77 int result = 0;
78 int timeout = 0;
79
80 dev_dbg(&sch_adapter.dev, "Transaction (pre): CNT=%02x, CMD=%02x, "
81 "ADD=%02x, DAT0=%02x, DAT1=%02x\n", inb(SMBHSTCNT),
82 inb(SMBHSTCMD), inb(SMBHSTADD), inb(SMBHSTDAT0),
83 inb(SMBHSTDAT1));
84
85 /* Make sure the SMBus host is ready to start transmitting */
86 temp = inb(SMBHSTSTS) & 0x0f;
87 if (temp) {
88 /* Can not be busy since we checked it in sch_access */
89 if (temp & 0x01) {
90 dev_dbg(&sch_adapter.dev, "Completion (%02x). "
91 "Clear...\n", temp);
92 }
93 if (temp & 0x06) {
94 dev_dbg(&sch_adapter.dev, "SMBus error (%02x). "
95 "Resetting...\n", temp);
96 }
97 outb(temp, SMBHSTSTS);
98 temp = inb(SMBHSTSTS) & 0x0f;
99 if (temp) {
100 dev_err(&sch_adapter.dev,
101 "SMBus is not ready: (%02x)\n", temp);
102 return -EAGAIN;
103 }
104 }
105
106 /* start the transaction by setting bit 4 */
107 outb(inb(SMBHSTCNT) | 0x10, SMBHSTCNT);
108
109 do {
110 msleep(1);
111 temp = inb(SMBHSTSTS) & 0x0f;
112 } while ((temp & 0x08) && (timeout++ < MAX_TIMEOUT));
113
114 /* If the SMBus is still busy, we give up */
115 if (timeout >= MAX_TIMEOUT) {
116 dev_err(&sch_adapter.dev, "SMBus Timeout!\n");
117 result = -ETIMEDOUT;
118 }
119 if (temp & 0x04) {
120 result = -EIO;
121 dev_dbg(&sch_adapter.dev, "Bus collision! SMBus may be "
122 "locked until next hard reset. (sorry!)\n");
123 /* Clock stops and slave is stuck in mid-transmission */
124 } else if (temp & 0x02) {
125 result = -EIO;
126 dev_err(&sch_adapter.dev, "Error: no response!\n");
127 } else if (temp & 0x01) {
128 dev_dbg(&sch_adapter.dev, "Post complete!\n");
129 outb(temp, SMBHSTSTS);
130 temp = inb(SMBHSTSTS) & 0x07;
131 if (temp & 0x06) {
132 /* Completion clear failed */
133 dev_dbg(&sch_adapter.dev, "Failed reset at end of "
134 "transaction (%02x), Bus error!\n", temp);
135 }
136 } else {
137 result = -ENXIO;
138 dev_dbg(&sch_adapter.dev, "No such address.\n");
139 }
140 dev_dbg(&sch_adapter.dev, "Transaction (post): CNT=%02x, CMD=%02x, "
141 "ADD=%02x, DAT0=%02x, DAT1=%02x\n", inb(SMBHSTCNT),
142 inb(SMBHSTCMD), inb(SMBHSTADD), inb(SMBHSTDAT0),
143 inb(SMBHSTDAT1));
144 return result;
145}
146
147/*
148 * This is the main access entry for i2c-sch access
149 * adap is i2c_adapter pointer, addr is the i2c device bus address, read_write
150 * (0 for read and 1 for write), size is i2c transaction type and data is the
151 * union of transaction for data to be transfered or data read from bus.
152 * return 0 for success and others for failure.
153 */
154static s32 sch_access(struct i2c_adapter *adap, u16 addr,
155 unsigned short flags, char read_write,
156 u8 command, int size, union i2c_smbus_data *data)
157{
158 int i, len, temp, rc;
159
160 /* Make sure the SMBus host is not busy */
161 temp = inb(SMBHSTSTS) & 0x0f;
162 if (temp & 0x08) {
163 dev_dbg(&sch_adapter.dev, "SMBus busy (%02x)\n", temp);
164 return -EAGAIN;
165 }
166 dev_dbg(&sch_adapter.dev, "access size: %d %s\n", size,
167 (read_write)?"READ":"WRITE");
168 switch (size) {
169 case I2C_SMBUS_QUICK:
170 outb((addr << 1) | read_write, SMBHSTADD);
171 size = SCH_QUICK;
172 break;
173 case I2C_SMBUS_BYTE:
174 outb((addr << 1) | read_write, SMBHSTADD);
175 if (read_write == I2C_SMBUS_WRITE)
176 outb(command, SMBHSTCMD);
177 size = SCH_BYTE;
178 break;
179 case I2C_SMBUS_BYTE_DATA:
180 outb((addr << 1) | read_write, SMBHSTADD);
181 outb(command, SMBHSTCMD);
182 if (read_write == I2C_SMBUS_WRITE)
183 outb(data->byte, SMBHSTDAT0);
184 size = SCH_BYTE_DATA;
185 break;
186 case I2C_SMBUS_WORD_DATA:
187 outb((addr << 1) | read_write, SMBHSTADD);
188 outb(command, SMBHSTCMD);
189 if (read_write == I2C_SMBUS_WRITE) {
190 outb(data->word & 0xff, SMBHSTDAT0);
191 outb((data->word & 0xff00) >> 8, SMBHSTDAT1);
192 }
193 size = SCH_WORD_DATA;
194 break;
195 case I2C_SMBUS_BLOCK_DATA:
196 outb((addr << 1) | read_write, SMBHSTADD);
197 outb(command, SMBHSTCMD);
198 if (read_write == I2C_SMBUS_WRITE) {
199 len = data->block[0];
200 if (len == 0 || len > I2C_SMBUS_BLOCK_MAX)
201 return -EINVAL;
202 outb(len, SMBHSTDAT0);
203 for (i = 1; i <= len; i++)
204 outb(data->block[i], SMBBLKDAT+i-1);
205 }
206 size = SCH_BLOCK_DATA;
207 break;
208 default:
209 dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
210 return -EOPNOTSUPP;
211 }
212 dev_dbg(&sch_adapter.dev, "write size %d to 0x%04x\n", size, SMBHSTCNT);
213 outb((inb(SMBHSTCNT) & 0xb0) | (size & 0x7), SMBHSTCNT);
214
215 rc = sch_transaction();
216 if (rc) /* Error in transaction */
217 return rc;
218
219 if ((read_write == I2C_SMBUS_WRITE) || (size == SCH_QUICK))
220 return 0;
221
222 switch (size) {
223 case SCH_BYTE:
224 case SCH_BYTE_DATA:
225 data->byte = inb(SMBHSTDAT0);
226 break;
227 case SCH_WORD_DATA:
228 data->word = inb(SMBHSTDAT0) + (inb(SMBHSTDAT1) << 8);
229 break;
230 case SCH_BLOCK_DATA:
231 data->block[0] = inb(SMBHSTDAT0);
232 if (data->block[0] == 0 || data->block[0] > I2C_SMBUS_BLOCK_MAX)
233 return -EPROTO;
234 for (i = 1; i <= data->block[0]; i++)
235 data->block[i] = inb(SMBBLKDAT+i-1);
236 break;
237 }
238 return 0;
239}
240
241static u32 sch_func(struct i2c_adapter *adapter)
242{
243 return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
244 I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
245 I2C_FUNC_SMBUS_BLOCK_DATA;
246}
247
248static const struct i2c_algorithm smbus_algorithm = {
249 .smbus_xfer = sch_access,
250 .functionality = sch_func,
251};
252
253static struct i2c_adapter sch_adapter = {
254 .owner = THIS_MODULE,
255 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
256 .algo = &smbus_algorithm,
257};
258
259static struct pci_device_id sch_ids[] = {
260 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SCH_LPC) },
261 { 0, }
262};
263
264MODULE_DEVICE_TABLE(pci, sch_ids);
265
266static int __devinit sch_probe(struct pci_dev *dev,
267 const struct pci_device_id *id)
268{
269 int retval;
270 unsigned int smba;
271
272 pci_read_config_dword(dev, SMBBA_SCH, &smba);
273 if (!(smba & (1 << 31))) {
274 dev_err(&dev->dev, "SMBus I/O space disabled!\n");
275 return -ENODEV;
276 }
277
278 sch_smba = (unsigned short)smba;
279 if (sch_smba == 0) {
280 dev_err(&dev->dev, "SMBus base address uninitialized!\n");
281 return -ENODEV;
282 }
283 if (acpi_check_region(sch_smba, SMBIOSIZE, sch_driver.name))
284 return -EBUSY;
285 if (!request_region(sch_smba, SMBIOSIZE, sch_driver.name)) {
286 dev_err(&dev->dev, "SMBus region 0x%x already in use!\n",
287 sch_smba);
288 return -EBUSY;
289 }
290 dev_dbg(&dev->dev, "SMBA = 0x%X\n", sch_smba);
291
292 /* set up the sysfs linkage to our parent device */
293 sch_adapter.dev.parent = &dev->dev;
294
295 snprintf(sch_adapter.name, sizeof(sch_adapter.name),
296 "SMBus SCH adapter at %04x", sch_smba);
297
298 retval = i2c_add_adapter(&sch_adapter);
299 if (retval) {
300 dev_err(&dev->dev, "Couldn't register adapter!\n");
301 release_region(sch_smba, SMBIOSIZE);
302 sch_smba = 0;
303 }
304
305 return retval;
306}
307
308static void __devexit sch_remove(struct pci_dev *dev)
309{
310 if (sch_smba) {
311 i2c_del_adapter(&sch_adapter);
312 release_region(sch_smba, SMBIOSIZE);
313 sch_smba = 0;
314 }
315}
316
317static struct pci_driver sch_driver = {
318 .name = "isch_smbus",
319 .id_table = sch_ids,
320 .probe = sch_probe,
321 .remove = __devexit_p(sch_remove),
322};
323
324static int __init i2c_sch_init(void)
325{
326 return pci_register_driver(&sch_driver);
327}
328
329static void __exit i2c_sch_exit(void)
330{
331 pci_unregister_driver(&sch_driver);
332}
333
334MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@intel.com>");
335MODULE_DESCRIPTION("Intel SCH SMBus driver");
336MODULE_LICENSE("GPL");
337
338module_init(i2c_sch_init);
339module_exit(i2c_sch_exit);
diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c
index a076129de7e8..10b9342a36c2 100644
--- a/drivers/i2c/busses/i2c-mpc.c
+++ b/drivers/i2c/busses/i2c-mpc.c
@@ -311,7 +311,7 @@ static struct i2c_adapter mpc_ops = {
311 .name = "MPC adapter", 311 .name = "MPC adapter",
312 .id = I2C_HW_MPC107, 312 .id = I2C_HW_MPC107,
313 .algo = &mpc_algo, 313 .algo = &mpc_algo,
314 .class = I2C_CLASS_HWMON, 314 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
315 .timeout = 1, 315 .timeout = 1,
316}; 316};
317 317
diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c
index 036e6a883e67..9e8118d2fe64 100644
--- a/drivers/i2c/busses/i2c-mv64xxx.c
+++ b/drivers/i2c/busses/i2c-mv64xxx.c
@@ -530,7 +530,7 @@ mv64xxx_i2c_probe(struct platform_device *pd)
530 drv_data->adapter.id = I2C_HW_MV64XXX; 530 drv_data->adapter.id = I2C_HW_MV64XXX;
531 drv_data->adapter.algo = &mv64xxx_i2c_algo; 531 drv_data->adapter.algo = &mv64xxx_i2c_algo;
532 drv_data->adapter.owner = THIS_MODULE; 532 drv_data->adapter.owner = THIS_MODULE;
533 drv_data->adapter.class = I2C_CLASS_HWMON; 533 drv_data->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
534 drv_data->adapter.timeout = pdata->timeout; 534 drv_data->adapter.timeout = pdata->timeout;
535 drv_data->adapter.nr = pd->id; 535 drv_data->adapter.nr = pd->id;
536 platform_set_drvdata(pd, drv_data); 536 platform_set_drvdata(pd, drv_data);
diff --git a/drivers/i2c/busses/i2c-nforce2-s4985.c b/drivers/i2c/busses/i2c-nforce2-s4985.c
new file mode 100644
index 000000000000..6a8995dfd0bb
--- /dev/null
+++ b/drivers/i2c/busses/i2c-nforce2-s4985.c
@@ -0,0 +1,257 @@
1/*
2 * i2c-nforce2-s4985.c - i2c-nforce2 extras for the Tyan S4985 motherboard
3 *
4 * Copyright (C) 2008 Jean Delvare <khali@linux-fr.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21/*
22 * We select the channels by sending commands to the Philips
23 * PCA9556 chip at I2C address 0x18. The main adapter is used for
24 * the non-multiplexed part of the bus, and 4 virtual adapters
25 * are defined for the multiplexed addresses: 0x50-0x53 (memory
26 * module EEPROM) located on channels 1-4. We define one virtual
27 * adapter per CPU, which corresponds to one multiplexed channel:
28 * CPU0: virtual adapter 1, channel 1
29 * CPU1: virtual adapter 2, channel 2
30 * CPU2: virtual adapter 3, channel 3
31 * CPU3: virtual adapter 4, channel 4
32 */
33
34#include <linux/module.h>
35#include <linux/kernel.h>
36#include <linux/slab.h>
37#include <linux/init.h>
38#include <linux/i2c.h>
39#include <linux/mutex.h>
40
41extern struct i2c_adapter *nforce2_smbus;
42
43static struct i2c_adapter *s4985_adapter;
44static struct i2c_algorithm *s4985_algo;
45
46/* Wrapper access functions for multiplexed SMBus */
47static DEFINE_MUTEX(nforce2_lock);
48
49static s32 nforce2_access_virt0(struct i2c_adapter *adap, u16 addr,
50 unsigned short flags, char read_write,
51 u8 command, int size,
52 union i2c_smbus_data *data)
53{
54 int error;
55
56 /* We exclude the multiplexed addresses */
57 if ((addr & 0xfc) == 0x50 || (addr & 0xfc) == 0x30
58 || addr == 0x18)
59 return -ENXIO;
60
61 mutex_lock(&nforce2_lock);
62 error = nforce2_smbus->algo->smbus_xfer(adap, addr, flags, read_write,
63 command, size, data);
64 mutex_unlock(&nforce2_lock);
65
66 return error;
67}
68
69/* We remember the last used channels combination so as to only switch
70 channels when it is really needed. This greatly reduces the SMBus
71 overhead, but also assumes that nobody will be writing to the PCA9556
72 in our back. */
73static u8 last_channels;
74
75static inline s32 nforce2_access_channel(struct i2c_adapter *adap, u16 addr,
76 unsigned short flags, char read_write,
77 u8 command, int size,
78 union i2c_smbus_data *data,
79 u8 channels)
80{
81 int error;
82
83 /* We exclude the non-multiplexed addresses */
84 if ((addr & 0xfc) != 0x50 && (addr & 0xfc) != 0x30)
85 return -ENXIO;
86
87 mutex_lock(&nforce2_lock);
88 if (last_channels != channels) {
89 union i2c_smbus_data mplxdata;
90 mplxdata.byte = channels;
91
92 error = nforce2_smbus->algo->smbus_xfer(adap, 0x18, 0,
93 I2C_SMBUS_WRITE, 0x01,
94 I2C_SMBUS_BYTE_DATA,
95 &mplxdata);
96 if (error)
97 goto UNLOCK;
98 last_channels = channels;
99 }
100 error = nforce2_smbus->algo->smbus_xfer(adap, addr, flags, read_write,
101 command, size, data);
102
103UNLOCK:
104 mutex_unlock(&nforce2_lock);
105 return error;
106}
107
108static s32 nforce2_access_virt1(struct i2c_adapter *adap, u16 addr,
109 unsigned short flags, char read_write,
110 u8 command, int size,
111 union i2c_smbus_data *data)
112{
113 /* CPU0: channel 1 enabled */
114 return nforce2_access_channel(adap, addr, flags, read_write, command,
115 size, data, 0x02);
116}
117
118static s32 nforce2_access_virt2(struct i2c_adapter *adap, u16 addr,
119 unsigned short flags, char read_write,
120 u8 command, int size,
121 union i2c_smbus_data *data)
122{
123 /* CPU1: channel 2 enabled */
124 return nforce2_access_channel(adap, addr, flags, read_write, command,
125 size, data, 0x04);
126}
127
128static s32 nforce2_access_virt3(struct i2c_adapter *adap, u16 addr,
129 unsigned short flags, char read_write,
130 u8 command, int size,
131 union i2c_smbus_data *data)
132{
133 /* CPU2: channel 3 enabled */
134 return nforce2_access_channel(adap, addr, flags, read_write, command,
135 size, data, 0x08);
136}
137
138static s32 nforce2_access_virt4(struct i2c_adapter *adap, u16 addr,
139 unsigned short flags, char read_write,
140 u8 command, int size,
141 union i2c_smbus_data *data)
142{
143 /* CPU3: channel 4 enabled */
144 return nforce2_access_channel(adap, addr, flags, read_write, command,
145 size, data, 0x10);
146}
147
148static int __init nforce2_s4985_init(void)
149{
150 int i, error;
151 union i2c_smbus_data ioconfig;
152
153 /* Unregister physical bus */
154 if (!nforce2_smbus)
155 return -ENODEV;
156 error = i2c_del_adapter(nforce2_smbus);
157 if (error) {
158 dev_err(&nforce2_smbus->dev, "Physical bus removal failed\n");
159 goto ERROR0;
160 }
161
162 printk(KERN_INFO "Enabling SMBus multiplexing for Tyan S4985\n");
163 /* Define the 5 virtual adapters and algorithms structures */
164 s4985_adapter = kzalloc(5 * sizeof(struct i2c_adapter), GFP_KERNEL);
165 if (!s4985_adapter) {
166 error = -ENOMEM;
167 goto ERROR1;
168 }
169 s4985_algo = kzalloc(5 * sizeof(struct i2c_algorithm), GFP_KERNEL);
170 if (!s4985_algo) {
171 error = -ENOMEM;
172 goto ERROR2;
173 }
174
175 /* Fill in the new structures */
176 s4985_algo[0] = *(nforce2_smbus->algo);
177 s4985_algo[0].smbus_xfer = nforce2_access_virt0;
178 s4985_adapter[0] = *nforce2_smbus;
179 s4985_adapter[0].algo = s4985_algo;
180 s4985_adapter[0].dev.parent = nforce2_smbus->dev.parent;
181 for (i = 1; i < 5; i++) {
182 s4985_algo[i] = *(nforce2_smbus->algo);
183 s4985_adapter[i] = *nforce2_smbus;
184 snprintf(s4985_adapter[i].name, sizeof(s4985_adapter[i].name),
185 "SMBus nForce2 adapter (CPU%d)", i - 1);
186 s4985_adapter[i].algo = s4985_algo + i;
187 s4985_adapter[i].dev.parent = nforce2_smbus->dev.parent;
188 }
189 s4985_algo[1].smbus_xfer = nforce2_access_virt1;
190 s4985_algo[2].smbus_xfer = nforce2_access_virt2;
191 s4985_algo[3].smbus_xfer = nforce2_access_virt3;
192 s4985_algo[4].smbus_xfer = nforce2_access_virt4;
193
194 /* Configure the PCA9556 multiplexer */
195 ioconfig.byte = 0x00; /* All I/O to output mode */
196 error = nforce2_smbus->algo->smbus_xfer(nforce2_smbus, 0x18, 0,
197 I2C_SMBUS_WRITE, 0x03,
198 I2C_SMBUS_BYTE_DATA, &ioconfig);
199 if (error) {
200 dev_err(&nforce2_smbus->dev, "PCA9556 configuration failed\n");
201 error = -EIO;
202 goto ERROR3;
203 }
204
205 /* Register virtual adapters */
206 for (i = 0; i < 5; i++) {
207 error = i2c_add_adapter(s4985_adapter + i);
208 if (error) {
209 dev_err(&nforce2_smbus->dev,
210 "Virtual adapter %d registration "
211 "failed, module not inserted\n", i);
212 for (i--; i >= 0; i--)
213 i2c_del_adapter(s4985_adapter + i);
214 goto ERROR3;
215 }
216 }
217
218 return 0;
219
220ERROR3:
221 kfree(s4985_algo);
222 s4985_algo = NULL;
223ERROR2:
224 kfree(s4985_adapter);
225 s4985_adapter = NULL;
226ERROR1:
227 /* Restore physical bus */
228 i2c_add_adapter(nforce2_smbus);
229ERROR0:
230 return error;
231}
232
233static void __exit nforce2_s4985_exit(void)
234{
235 if (s4985_adapter) {
236 int i;
237
238 for (i = 0; i < 5; i++)
239 i2c_del_adapter(s4985_adapter+i);
240 kfree(s4985_adapter);
241 s4985_adapter = NULL;
242 }
243 kfree(s4985_algo);
244 s4985_algo = NULL;
245
246 /* Restore physical bus */
247 if (i2c_add_adapter(nforce2_smbus))
248 dev_err(&nforce2_smbus->dev, "Physical bus restoration "
249 "failed\n");
250}
251
252MODULE_AUTHOR("Jean Delvare <khali@linux-fr.org>");
253MODULE_DESCRIPTION("S4985 SMBus multiplexing");
254MODULE_LICENSE("GPL");
255
256module_init(nforce2_s4985_init);
257module_exit(nforce2_s4985_exit);
diff --git a/drivers/i2c/busses/i2c-nforce2.c b/drivers/i2c/busses/i2c-nforce2.c
index 43c9f8df9509..3b19bc41a60b 100644
--- a/drivers/i2c/busses/i2c-nforce2.c
+++ b/drivers/i2c/busses/i2c-nforce2.c
@@ -51,6 +51,7 @@
51#include <linux/i2c.h> 51#include <linux/i2c.h>
52#include <linux/delay.h> 52#include <linux/delay.h>
53#include <linux/dmi.h> 53#include <linux/dmi.h>
54#include <linux/acpi.h>
54#include <asm/io.h> 55#include <asm/io.h>
55 56
56MODULE_LICENSE("GPL"); 57MODULE_LICENSE("GPL");
@@ -124,6 +125,20 @@ static struct dmi_system_id __devinitdata nforce2_dmi_blacklist2[] = {
124 125
125static struct pci_driver nforce2_driver; 126static struct pci_driver nforce2_driver;
126 127
128/* For multiplexing support, we need a global reference to the 1st
129 SMBus channel */
130#if defined CONFIG_I2C_NFORCE2_S4985 || defined CONFIG_I2C_NFORCE2_S4985_MODULE
131struct i2c_adapter *nforce2_smbus;
132EXPORT_SYMBOL_GPL(nforce2_smbus);
133
134static void nforce2_set_reference(struct i2c_adapter *adap)
135{
136 nforce2_smbus = adap;
137}
138#else
139static inline void nforce2_set_reference(struct i2c_adapter *adap) { }
140#endif
141
127static void nforce2_abort(struct i2c_adapter *adap) 142static void nforce2_abort(struct i2c_adapter *adap)
128{ 143{
129 struct nforce2_smbus *smbus = adap->algo_data; 144 struct nforce2_smbus *smbus = adap->algo_data;
@@ -158,16 +173,16 @@ static int nforce2_check_status(struct i2c_adapter *adap)
158 dev_dbg(&adap->dev, "SMBus Timeout!\n"); 173 dev_dbg(&adap->dev, "SMBus Timeout!\n");
159 if (smbus->can_abort) 174 if (smbus->can_abort)
160 nforce2_abort(adap); 175 nforce2_abort(adap);
161 return -1; 176 return -ETIMEDOUT;
162 } 177 }
163 if (!(temp & NVIDIA_SMB_STS_DONE) || (temp & NVIDIA_SMB_STS_STATUS)) { 178 if (!(temp & NVIDIA_SMB_STS_DONE) || (temp & NVIDIA_SMB_STS_STATUS)) {
164 dev_dbg(&adap->dev, "Transaction failed (0x%02x)!\n", temp); 179 dev_dbg(&adap->dev, "Transaction failed (0x%02x)!\n", temp);
165 return -1; 180 return -EIO;
166 } 181 }
167 return 0; 182 return 0;
168} 183}
169 184
170/* Return -1 on error */ 185/* Return negative errno on error */
171static s32 nforce2_access(struct i2c_adapter * adap, u16 addr, 186static s32 nforce2_access(struct i2c_adapter * adap, u16 addr,
172 unsigned short flags, char read_write, 187 unsigned short flags, char read_write,
173 u8 command, int size, union i2c_smbus_data * data) 188 u8 command, int size, union i2c_smbus_data * data)
@@ -175,7 +190,7 @@ static s32 nforce2_access(struct i2c_adapter * adap, u16 addr,
175 struct nforce2_smbus *smbus = adap->algo_data; 190 struct nforce2_smbus *smbus = adap->algo_data;
176 unsigned char protocol, pec; 191 unsigned char protocol, pec;
177 u8 len; 192 u8 len;
178 int i; 193 int i, status;
179 194
180 protocol = (read_write == I2C_SMBUS_READ) ? NVIDIA_SMB_PRTCL_READ : 195 protocol = (read_write == I2C_SMBUS_READ) ? NVIDIA_SMB_PRTCL_READ :
181 NVIDIA_SMB_PRTCL_WRITE; 196 NVIDIA_SMB_PRTCL_WRITE;
@@ -219,7 +234,7 @@ static s32 nforce2_access(struct i2c_adapter * adap, u16 addr,
219 "Transaction failed " 234 "Transaction failed "
220 "(requested block size: %d)\n", 235 "(requested block size: %d)\n",
221 len); 236 len);
222 return -1; 237 return -EINVAL;
223 } 238 }
224 outb_p(len, NVIDIA_SMB_BCNT); 239 outb_p(len, NVIDIA_SMB_BCNT);
225 for (i = 0; i < I2C_SMBUS_BLOCK_MAX; i++) 240 for (i = 0; i < I2C_SMBUS_BLOCK_MAX; i++)
@@ -231,14 +246,15 @@ static s32 nforce2_access(struct i2c_adapter * adap, u16 addr,
231 246
232 default: 247 default:
233 dev_err(&adap->dev, "Unsupported transaction %d\n", size); 248 dev_err(&adap->dev, "Unsupported transaction %d\n", size);
234 return -1; 249 return -EOPNOTSUPP;
235 } 250 }
236 251
237 outb_p((addr & 0x7f) << 1, NVIDIA_SMB_ADDR); 252 outb_p((addr & 0x7f) << 1, NVIDIA_SMB_ADDR);
238 outb_p(protocol, NVIDIA_SMB_PRTCL); 253 outb_p(protocol, NVIDIA_SMB_PRTCL);
239 254
240 if (nforce2_check_status(adap)) 255 status = nforce2_check_status(adap);
241 return -1; 256 if (status)
257 return status;
242 258
243 if (read_write == I2C_SMBUS_WRITE) 259 if (read_write == I2C_SMBUS_WRITE)
244 return 0; 260 return 0;
@@ -260,7 +276,7 @@ static s32 nforce2_access(struct i2c_adapter * adap, u16 addr,
260 dev_err(&adap->dev, "Transaction failed " 276 dev_err(&adap->dev, "Transaction failed "
261 "(received block size: 0x%02x)\n", 277 "(received block size: 0x%02x)\n",
262 len); 278 len);
263 return -1; 279 return -EPROTO;
264 } 280 }
265 for (i = 0; i < len; i++) 281 for (i = 0; i < len; i++)
266 data->block[i+1] = inb_p(NVIDIA_SMB_DATA + i); 282 data->block[i+1] = inb_p(NVIDIA_SMB_DATA + i);
@@ -321,21 +337,26 @@ static int __devinit nforce2_probe_smb (struct pci_dev *dev, int bar,
321 != PCIBIOS_SUCCESSFUL) { 337 != PCIBIOS_SUCCESSFUL) {
322 dev_err(&dev->dev, "Error reading PCI config for %s\n", 338 dev_err(&dev->dev, "Error reading PCI config for %s\n",
323 name); 339 name);
324 return -1; 340 return -EIO;
325 } 341 }
326 342
327 smbus->base = iobase & PCI_BASE_ADDRESS_IO_MASK; 343 smbus->base = iobase & PCI_BASE_ADDRESS_IO_MASK;
328 smbus->size = 64; 344 smbus->size = 64;
329 } 345 }
330 346
347 error = acpi_check_region(smbus->base, smbus->size,
348 nforce2_driver.name);
349 if (error)
350 return -1;
351
331 if (!request_region(smbus->base, smbus->size, nforce2_driver.name)) { 352 if (!request_region(smbus->base, smbus->size, nforce2_driver.name)) {
332 dev_err(&smbus->adapter.dev, "Error requesting region %02x .. %02X for %s\n", 353 dev_err(&smbus->adapter.dev, "Error requesting region %02x .. %02X for %s\n",
333 smbus->base, smbus->base+smbus->size-1, name); 354 smbus->base, smbus->base+smbus->size-1, name);
334 return -1; 355 return -EBUSY;
335 } 356 }
336 smbus->adapter.owner = THIS_MODULE; 357 smbus->adapter.owner = THIS_MODULE;
337 smbus->adapter.id = I2C_HW_SMBUS_NFORCE2; 358 smbus->adapter.id = I2C_HW_SMBUS_NFORCE2;
338 smbus->adapter.class = I2C_CLASS_HWMON; 359 smbus->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
339 smbus->adapter.algo = &smbus_algorithm; 360 smbus->adapter.algo = &smbus_algorithm;
340 smbus->adapter.algo_data = smbus; 361 smbus->adapter.algo_data = smbus;
341 smbus->adapter.dev.parent = &dev->dev; 362 smbus->adapter.dev.parent = &dev->dev;
@@ -346,7 +367,7 @@ static int __devinit nforce2_probe_smb (struct pci_dev *dev, int bar,
346 if (error) { 367 if (error) {
347 dev_err(&smbus->adapter.dev, "Failed to register adapter.\n"); 368 dev_err(&smbus->adapter.dev, "Failed to register adapter.\n");
348 release_region(smbus->base, smbus->size); 369 release_region(smbus->base, smbus->size);
349 return -1; 370 return error;
350 } 371 }
351 dev_info(&smbus->adapter.dev, "nForce2 SMBus adapter at %#x\n", smbus->base); 372 dev_info(&smbus->adapter.dev, "nForce2 SMBus adapter at %#x\n", smbus->base);
352 return 0; 373 return 0;
@@ -398,6 +419,7 @@ static int __devinit nforce2_probe(struct pci_dev *dev, const struct pci_device_
398 return -ENODEV; 419 return -ENODEV;
399 } 420 }
400 421
422 nforce2_set_reference(&smbuses[0].adapter);
401 return 0; 423 return 0;
402} 424}
403 425
@@ -406,6 +428,7 @@ static void __devexit nforce2_remove(struct pci_dev *dev)
406{ 428{
407 struct nforce2_smbus *smbuses = (void*) pci_get_drvdata(dev); 429 struct nforce2_smbus *smbuses = (void*) pci_get_drvdata(dev);
408 430
431 nforce2_set_reference(NULL);
409 if (smbuses[0].base) { 432 if (smbuses[0].base) {
410 i2c_del_adapter(&smbuses[0].adapter); 433 i2c_del_adapter(&smbuses[0].adapter);
411 release_region(smbuses[0].base, smbuses[0].size); 434 release_region(smbuses[0].base, smbuses[0].size);
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index f145692cbb76..e5193bf75483 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -29,6 +29,7 @@ struct ocores_i2c {
29 int pos; 29 int pos;
30 int nmsgs; 30 int nmsgs;
31 int state; /* see STATE_ */ 31 int state; /* see STATE_ */
32 int clock_khz;
32}; 33};
33 34
34/* registers */ 35/* registers */
@@ -173,8 +174,7 @@ static int ocores_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
173 return -ETIMEDOUT; 174 return -ETIMEDOUT;
174} 175}
175 176
176static void ocores_init(struct ocores_i2c *i2c, 177static void ocores_init(struct ocores_i2c *i2c)
177 struct ocores_i2c_platform_data *pdata)
178{ 178{
179 int prescale; 179 int prescale;
180 u8 ctrl = oc_getreg(i2c, OCI2C_CONTROL); 180 u8 ctrl = oc_getreg(i2c, OCI2C_CONTROL);
@@ -182,7 +182,7 @@ static void ocores_init(struct ocores_i2c *i2c,
182 /* make sure the device is disabled */ 182 /* make sure the device is disabled */
183 oc_setreg(i2c, OCI2C_CONTROL, ctrl & ~(OCI2C_CTRL_EN|OCI2C_CTRL_IEN)); 183 oc_setreg(i2c, OCI2C_CONTROL, ctrl & ~(OCI2C_CTRL_EN|OCI2C_CTRL_IEN));
184 184
185 prescale = (pdata->clock_khz / (5*100)) - 1; 185 prescale = (i2c->clock_khz / (5*100)) - 1;
186 oc_setreg(i2c, OCI2C_PRELOW, prescale & 0xff); 186 oc_setreg(i2c, OCI2C_PRELOW, prescale & 0xff);
187 oc_setreg(i2c, OCI2C_PREHIGH, prescale >> 8); 187 oc_setreg(i2c, OCI2C_PREHIGH, prescale >> 8);
188 188
@@ -205,7 +205,7 @@ static const struct i2c_algorithm ocores_algorithm = {
205static struct i2c_adapter ocores_adapter = { 205static struct i2c_adapter ocores_adapter = {
206 .owner = THIS_MODULE, 206 .owner = THIS_MODULE,
207 .name = "i2c-ocores", 207 .name = "i2c-ocores",
208 .class = I2C_CLASS_HWMON, 208 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
209 .algo = &ocores_algorithm, 209 .algo = &ocores_algorithm,
210}; 210};
211 211
@@ -248,7 +248,8 @@ static int __devinit ocores_i2c_probe(struct platform_device *pdev)
248 } 248 }
249 249
250 i2c->regstep = pdata->regstep; 250 i2c->regstep = pdata->regstep;
251 ocores_init(i2c, pdata); 251 i2c->clock_khz = pdata->clock_khz;
252 ocores_init(i2c);
252 253
253 init_waitqueue_head(&i2c->wait); 254 init_waitqueue_head(&i2c->wait);
254 ret = request_irq(res2->start, ocores_isr, 0, pdev->name, i2c); 255 ret = request_irq(res2->start, ocores_isr, 0, pdev->name, i2c);
@@ -312,13 +313,40 @@ static int __devexit ocores_i2c_remove(struct platform_device* pdev)
312 return 0; 313 return 0;
313} 314}
314 315
316#ifdef CONFIG_PM
317static int ocores_i2c_suspend(struct platform_device *pdev, pm_message_t state)
318{
319 struct ocores_i2c *i2c = platform_get_drvdata(pdev);
320 u8 ctrl = oc_getreg(i2c, OCI2C_CONTROL);
321
322 /* make sure the device is disabled */
323 oc_setreg(i2c, OCI2C_CONTROL, ctrl & ~(OCI2C_CTRL_EN|OCI2C_CTRL_IEN));
324
325 return 0;
326}
327
328static int ocores_i2c_resume(struct platform_device *pdev)
329{
330 struct ocores_i2c *i2c = platform_get_drvdata(pdev);
331
332 ocores_init(i2c);
333
334 return 0;
335}
336#else
337#define ocores_i2c_suspend NULL
338#define ocores_i2c_resume NULL
339#endif
340
315/* work with hotplug and coldplug */ 341/* work with hotplug and coldplug */
316MODULE_ALIAS("platform:ocores-i2c"); 342MODULE_ALIAS("platform:ocores-i2c");
317 343
318static struct platform_driver ocores_i2c_driver = { 344static struct platform_driver ocores_i2c_driver = {
319 .probe = ocores_i2c_probe, 345 .probe = ocores_i2c_probe,
320 .remove = __devexit_p(ocores_i2c_remove), 346 .remove = __devexit_p(ocores_i2c_remove),
321 .driver = { 347 .suspend = ocores_i2c_suspend,
348 .resume = ocores_i2c_resume,
349 .driver = {
322 .owner = THIS_MODULE, 350 .owner = THIS_MODULE,
323 .name = "ocores-i2c", 351 .name = "ocores-i2c",
324 }, 352 },
diff --git a/drivers/i2c/busses/i2c-pasemi.c b/drivers/i2c/busses/i2c-pasemi.c
index 1603c81e39d4..adf0fbb902f0 100644
--- a/drivers/i2c/busses/i2c-pasemi.c
+++ b/drivers/i2c/busses/i2c-pasemi.c
@@ -365,7 +365,7 @@ static int __devinit pasemi_smb_probe(struct pci_dev *dev,
365 smbus->adapter.owner = THIS_MODULE; 365 smbus->adapter.owner = THIS_MODULE;
366 snprintf(smbus->adapter.name, sizeof(smbus->adapter.name), 366 snprintf(smbus->adapter.name, sizeof(smbus->adapter.name),
367 "PA Semi SMBus adapter at 0x%lx", smbus->base); 367 "PA Semi SMBus adapter at 0x%lx", smbus->base);
368 smbus->adapter.class = I2C_CLASS_HWMON; 368 smbus->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
369 smbus->adapter.algo = &smbus_algorithm; 369 smbus->adapter.algo = &smbus_algorithm;
370 smbus->adapter.algo_data = smbus; 370 smbus->adapter.algo_data = smbus;
371 smbus->adapter.nr = PCI_FUNC(dev->devfn); 371 smbus->adapter.nr = PCI_FUNC(dev->devfn);
diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c
index 9d75f51e8f0e..6bb15ad0a6b6 100644
--- a/drivers/i2c/busses/i2c-pca-platform.c
+++ b/drivers/i2c/busses/i2c-pca-platform.c
@@ -163,7 +163,7 @@ static int __devinit i2c_pca_pf_probe(struct platform_device *pdev)
163 163
164 i2c->reg_base = ioremap(res->start, res_len(res)); 164 i2c->reg_base = ioremap(res->start, res_len(res));
165 if (!i2c->reg_base) { 165 if (!i2c->reg_base) {
166 ret = -EIO; 166 ret = -ENOMEM;
167 goto e_remap; 167 goto e_remap;
168 } 168 }
169 i2c->io_base = res->start; 169 i2c->io_base = res->start;
diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index ac9165968587..eaa9b387543e 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -1,6 +1,4 @@
1/* 1/*
2 piix4.c - Part of lm_sensors, Linux kernel modules for hardware
3 monitoring
4 Copyright (c) 1998 - 2002 Frodo Looijaard <frodol@dds.nl> and 2 Copyright (c) 1998 - 2002 Frodo Looijaard <frodol@dds.nl> and
5 Philip Edelbrock <phil@netroedge.com> 3 Philip Edelbrock <phil@netroedge.com>
6 4
@@ -39,16 +37,10 @@
39#include <linux/i2c.h> 37#include <linux/i2c.h>
40#include <linux/init.h> 38#include <linux/init.h>
41#include <linux/dmi.h> 39#include <linux/dmi.h>
40#include <linux/acpi.h>
42#include <asm/io.h> 41#include <asm/io.h>
43 42
44 43
45struct sd {
46 const unsigned short mfr;
47 const unsigned short dev;
48 const unsigned char fn;
49 const char *name;
50};
51
52/* PIIX4 SMBus address offsets */ 44/* PIIX4 SMBus address offsets */
53#define SMBHSTSTS (0 + piix4_smba) 45#define SMBHSTSTS (0 + piix4_smba)
54#define SMBHSLVSTS (1 + piix4_smba) 46#define SMBHSLVSTS (1 + piix4_smba)
@@ -101,8 +93,6 @@ MODULE_PARM_DESC(force_addr,
101 "Forcibly enable the PIIX4 at the given address. " 93 "Forcibly enable the PIIX4 at the given address. "
102 "EXTREMELY DANGEROUS!"); 94 "EXTREMELY DANGEROUS!");
103 95
104static int piix4_transaction(void);
105
106static unsigned short piix4_smba; 96static unsigned short piix4_smba;
107static int srvrworks_csb5_delay; 97static int srvrworks_csb5_delay;
108static struct pci_driver piix4_driver; 98static struct pci_driver piix4_driver;
@@ -141,8 +131,6 @@ static int __devinit piix4_setup(struct pci_dev *PIIX4_dev,
141{ 131{
142 unsigned char temp; 132 unsigned char temp;
143 133
144 dev_info(&PIIX4_dev->dev, "Found %s device\n", pci_name(PIIX4_dev));
145
146 if ((PIIX4_dev->vendor == PCI_VENDOR_ID_SERVERWORKS) && 134 if ((PIIX4_dev->vendor == PCI_VENDOR_ID_SERVERWORKS) &&
147 (PIIX4_dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5)) 135 (PIIX4_dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5))
148 srvrworks_csb5_delay = 1; 136 srvrworks_csb5_delay = 1;
@@ -172,17 +160,20 @@ static int __devinit piix4_setup(struct pci_dev *PIIX4_dev,
172 pci_read_config_word(PIIX4_dev, SMBBA, &piix4_smba); 160 pci_read_config_word(PIIX4_dev, SMBBA, &piix4_smba);
173 piix4_smba &= 0xfff0; 161 piix4_smba &= 0xfff0;
174 if(piix4_smba == 0) { 162 if(piix4_smba == 0) {
175 dev_err(&PIIX4_dev->dev, "SMB base address " 163 dev_err(&PIIX4_dev->dev, "SMBus base address "
176 "uninitialized - upgrade BIOS or use " 164 "uninitialized - upgrade BIOS or use "
177 "force_addr=0xaddr\n"); 165 "force_addr=0xaddr\n");
178 return -ENODEV; 166 return -ENODEV;
179 } 167 }
180 } 168 }
181 169
170 if (acpi_check_region(piix4_smba, SMBIOSIZE, piix4_driver.name))
171 return -EBUSY;
172
182 if (!request_region(piix4_smba, SMBIOSIZE, piix4_driver.name)) { 173 if (!request_region(piix4_smba, SMBIOSIZE, piix4_driver.name)) {
183 dev_err(&PIIX4_dev->dev, "SMB region 0x%x already in use!\n", 174 dev_err(&PIIX4_dev->dev, "SMBus region 0x%x already in use!\n",
184 piix4_smba); 175 piix4_smba);
185 return -ENODEV; 176 return -EBUSY;
186 } 177 }
187 178
188 pci_read_config_byte(PIIX4_dev, SMBHSTCFG, &temp); 179 pci_read_config_byte(PIIX4_dev, SMBHSTCFG, &temp);
@@ -228,13 +219,13 @@ static int __devinit piix4_setup(struct pci_dev *PIIX4_dev,
228 "(or code out of date)!\n"); 219 "(or code out of date)!\n");
229 220
230 pci_read_config_byte(PIIX4_dev, SMBREV, &temp); 221 pci_read_config_byte(PIIX4_dev, SMBREV, &temp);
231 dev_dbg(&PIIX4_dev->dev, "SMBREV = 0x%X\n", temp); 222 dev_info(&PIIX4_dev->dev,
232 dev_dbg(&PIIX4_dev->dev, "SMBA = 0x%X\n", piix4_smba); 223 "SMBus Host Controller at 0x%x, revision %d\n",
224 piix4_smba, temp);
233 225
234 return 0; 226 return 0;
235} 227}
236 228
237/* Another internally used function */
238static int piix4_transaction(void) 229static int piix4_transaction(void)
239{ 230{
240 int temp; 231 int temp;
@@ -253,7 +244,7 @@ static int piix4_transaction(void)
253 outb_p(temp, SMBHSTSTS); 244 outb_p(temp, SMBHSTSTS);
254 if ((temp = inb_p(SMBHSTSTS)) != 0x00) { 245 if ((temp = inb_p(SMBHSTSTS)) != 0x00) {
255 dev_err(&piix4_adapter.dev, "Failed! (%02x)\n", temp); 246 dev_err(&piix4_adapter.dev, "Failed! (%02x)\n", temp);
256 return -1; 247 return -EBUSY;
257 } else { 248 } else {
258 dev_dbg(&piix4_adapter.dev, "Successful!\n"); 249 dev_dbg(&piix4_adapter.dev, "Successful!\n");
259 } 250 }
@@ -275,23 +266,23 @@ static int piix4_transaction(void)
275 /* If the SMBus is still busy, we give up */ 266 /* If the SMBus is still busy, we give up */
276 if (timeout >= MAX_TIMEOUT) { 267 if (timeout >= MAX_TIMEOUT) {
277 dev_err(&piix4_adapter.dev, "SMBus Timeout!\n"); 268 dev_err(&piix4_adapter.dev, "SMBus Timeout!\n");
278 result = -1; 269 result = -ETIMEDOUT;
279 } 270 }
280 271
281 if (temp & 0x10) { 272 if (temp & 0x10) {
282 result = -1; 273 result = -EIO;
283 dev_err(&piix4_adapter.dev, "Error: Failed bus transaction\n"); 274 dev_err(&piix4_adapter.dev, "Error: Failed bus transaction\n");
284 } 275 }
285 276
286 if (temp & 0x08) { 277 if (temp & 0x08) {
287 result = -1; 278 result = -EIO;
288 dev_dbg(&piix4_adapter.dev, "Bus collision! SMBus may be " 279 dev_dbg(&piix4_adapter.dev, "Bus collision! SMBus may be "
289 "locked until next hard reset. (sorry!)\n"); 280 "locked until next hard reset. (sorry!)\n");
290 /* Clock stops and slave is stuck in mid-transmission */ 281 /* Clock stops and slave is stuck in mid-transmission */
291 } 282 }
292 283
293 if (temp & 0x04) { 284 if (temp & 0x04) {
294 result = -1; 285 result = -ENXIO;
295 dev_dbg(&piix4_adapter.dev, "Error: no response!\n"); 286 dev_dbg(&piix4_adapter.dev, "Error: no response!\n");
296 } 287 }
297 288
@@ -309,31 +300,29 @@ static int piix4_transaction(void)
309 return result; 300 return result;
310} 301}
311 302
312/* Return -1 on error. */ 303/* Return negative errno on error. */
313static s32 piix4_access(struct i2c_adapter * adap, u16 addr, 304static s32 piix4_access(struct i2c_adapter * adap, u16 addr,
314 unsigned short flags, char read_write, 305 unsigned short flags, char read_write,
315 u8 command, int size, union i2c_smbus_data * data) 306 u8 command, int size, union i2c_smbus_data * data)
316{ 307{
317 int i, len; 308 int i, len;
309 int status;
318 310
319 switch (size) { 311 switch (size) {
320 case I2C_SMBUS_PROC_CALL:
321 dev_err(&adap->dev, "I2C_SMBUS_PROC_CALL not supported!\n");
322 return -1;
323 case I2C_SMBUS_QUICK: 312 case I2C_SMBUS_QUICK:
324 outb_p(((addr & 0x7f) << 1) | (read_write & 0x01), 313 outb_p((addr << 1) | read_write,
325 SMBHSTADD); 314 SMBHSTADD);
326 size = PIIX4_QUICK; 315 size = PIIX4_QUICK;
327 break; 316 break;
328 case I2C_SMBUS_BYTE: 317 case I2C_SMBUS_BYTE:
329 outb_p(((addr & 0x7f) << 1) | (read_write & 0x01), 318 outb_p((addr << 1) | read_write,
330 SMBHSTADD); 319 SMBHSTADD);
331 if (read_write == I2C_SMBUS_WRITE) 320 if (read_write == I2C_SMBUS_WRITE)
332 outb_p(command, SMBHSTCMD); 321 outb_p(command, SMBHSTCMD);
333 size = PIIX4_BYTE; 322 size = PIIX4_BYTE;
334 break; 323 break;
335 case I2C_SMBUS_BYTE_DATA: 324 case I2C_SMBUS_BYTE_DATA:
336 outb_p(((addr & 0x7f) << 1) | (read_write & 0x01), 325 outb_p((addr << 1) | read_write,
337 SMBHSTADD); 326 SMBHSTADD);
338 outb_p(command, SMBHSTCMD); 327 outb_p(command, SMBHSTCMD);
339 if (read_write == I2C_SMBUS_WRITE) 328 if (read_write == I2C_SMBUS_WRITE)
@@ -341,7 +330,7 @@ static s32 piix4_access(struct i2c_adapter * adap, u16 addr,
341 size = PIIX4_BYTE_DATA; 330 size = PIIX4_BYTE_DATA;
342 break; 331 break;
343 case I2C_SMBUS_WORD_DATA: 332 case I2C_SMBUS_WORD_DATA:
344 outb_p(((addr & 0x7f) << 1) | (read_write & 0x01), 333 outb_p((addr << 1) | read_write,
345 SMBHSTADD); 334 SMBHSTADD);
346 outb_p(command, SMBHSTCMD); 335 outb_p(command, SMBHSTCMD);
347 if (read_write == I2C_SMBUS_WRITE) { 336 if (read_write == I2C_SMBUS_WRITE) {
@@ -351,15 +340,13 @@ static s32 piix4_access(struct i2c_adapter * adap, u16 addr,
351 size = PIIX4_WORD_DATA; 340 size = PIIX4_WORD_DATA;
352 break; 341 break;
353 case I2C_SMBUS_BLOCK_DATA: 342 case I2C_SMBUS_BLOCK_DATA:
354 outb_p(((addr & 0x7f) << 1) | (read_write & 0x01), 343 outb_p((addr << 1) | read_write,
355 SMBHSTADD); 344 SMBHSTADD);
356 outb_p(command, SMBHSTCMD); 345 outb_p(command, SMBHSTCMD);
357 if (read_write == I2C_SMBUS_WRITE) { 346 if (read_write == I2C_SMBUS_WRITE) {
358 len = data->block[0]; 347 len = data->block[0];
359 if (len < 0) 348 if (len == 0 || len > I2C_SMBUS_BLOCK_MAX)
360 len = 0; 349 return -EINVAL;
361 if (len > 32)
362 len = 32;
363 outb_p(len, SMBHSTDAT0); 350 outb_p(len, SMBHSTDAT0);
364 i = inb_p(SMBHSTCNT); /* Reset SMBBLKDAT */ 351 i = inb_p(SMBHSTCNT); /* Reset SMBBLKDAT */
365 for (i = 1; i <= len; i++) 352 for (i = 1; i <= len; i++)
@@ -367,12 +354,16 @@ static s32 piix4_access(struct i2c_adapter * adap, u16 addr,
367 } 354 }
368 size = PIIX4_BLOCK_DATA; 355 size = PIIX4_BLOCK_DATA;
369 break; 356 break;
357 default:
358 dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
359 return -EOPNOTSUPP;
370 } 360 }
371 361
372 outb_p((size & 0x1C) + (ENABLE_INT9 & 1), SMBHSTCNT); 362 outb_p((size & 0x1C) + (ENABLE_INT9 & 1), SMBHSTCNT);
373 363
374 if (piix4_transaction()) /* Error in transaction */ 364 status = piix4_transaction();
375 return -1; 365 if (status)
366 return status;
376 367
377 if ((read_write == I2C_SMBUS_WRITE) || (size == PIIX4_QUICK)) 368 if ((read_write == I2C_SMBUS_WRITE) || (size == PIIX4_QUICK))
378 return 0; 369 return 0;
@@ -388,6 +379,8 @@ static s32 piix4_access(struct i2c_adapter * adap, u16 addr,
388 break; 379 break;
389 case PIIX4_BLOCK_DATA: 380 case PIIX4_BLOCK_DATA:
390 data->block[0] = inb_p(SMBHSTDAT0); 381 data->block[0] = inb_p(SMBHSTDAT0);
382 if (data->block[0] == 0 || data->block[0] > I2C_SMBUS_BLOCK_MAX)
383 return -EPROTO;
391 i = inb_p(SMBHSTCNT); /* Reset SMBBLKDAT */ 384 i = inb_p(SMBHSTCNT); /* Reset SMBBLKDAT */
392 for (i = 1; i <= data->block[0]; i++) 385 for (i = 1; i <= data->block[0]; i++)
393 data->block[i] = inb_p(SMBBLKDAT); 386 data->block[i] = inb_p(SMBBLKDAT);
@@ -411,7 +404,7 @@ static const struct i2c_algorithm smbus_algorithm = {
411static struct i2c_adapter piix4_adapter = { 404static struct i2c_adapter piix4_adapter = {
412 .owner = THIS_MODULE, 405 .owner = THIS_MODULE,
413 .id = I2C_HW_SMBUS_PIIX4, 406 .id = I2C_HW_SMBUS_PIIX4,
414 .class = I2C_CLASS_HWMON, 407 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
415 .algo = &smbus_algorithm, 408 .algo = &smbus_algorithm,
416}; 409};
417 410
diff --git a/drivers/i2c/busses/i2c-pmcmsp.c b/drivers/i2c/busses/i2c-pmcmsp.c
index 63b3e2c11cff..dcf2045b5222 100644
--- a/drivers/i2c/busses/i2c-pmcmsp.c
+++ b/drivers/i2c/busses/i2c-pmcmsp.c
@@ -622,7 +622,7 @@ static struct i2c_algorithm pmcmsptwi_algo = {
622 622
623static struct i2c_adapter pmcmsptwi_adapter = { 623static struct i2c_adapter pmcmsptwi_adapter = {
624 .owner = THIS_MODULE, 624 .owner = THIS_MODULE,
625 .class = I2C_CLASS_HWMON, 625 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
626 .algo = &pmcmsptwi_algo, 626 .algo = &pmcmsptwi_algo,
627 .name = DRV_NAME, 627 .name = DRV_NAME,
628}; 628};
diff --git a/drivers/i2c/busses/i2c-prosavage.c b/drivers/i2c/busses/i2c-prosavage.c
deleted file mode 100644
index 07c1f1e27df1..000000000000
--- a/drivers/i2c/busses/i2c-prosavage.c
+++ /dev/null
@@ -1,325 +0,0 @@
1/*
2 * kernel/busses/i2c-prosavage.c
3 *
4 * i2c bus driver for S3/VIA 8365/8375 graphics processor.
5 * Copyright (c) 2003 Henk Vergonet <henk@god.dyndns.org>
6 * Based on code written by:
7 * Frodo Looijaard <frodol@dds.nl>,
8 * Philip Edelbrock <phil@netroedge.com>,
9 * Ralph Metzler <rjkm@thp.uni-koeln.de>, and
10 * Mark D. Studebaker <mdsxyz123@yahoo.com>
11 * Simon Vogl
12 * and others
13 *
14 * Please read the lm_sensors documentation for details on use.
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2 of the License, or
19 * (at your option) any later version.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
29 *
30 */
31/* 18-05-2003 HVE - created
32 * 14-06-2003 HVE - adapted for lm_sensors2
33 * 17-06-2003 HVE - linux 2.5.xx compatible
34 * 18-06-2003 HVE - codingstyle
35 * 21-06-2003 HVE - compatibility lm_sensors2 and linux 2.5.xx
36 * codingstyle, mmio enabled
37 *
38 * This driver interfaces to the I2C bus of the VIA north bridge embedded
39 * ProSavage4/8 devices. Usefull for gaining access to the TV Encoder chips.
40 *
41 * Graphics cores:
42 * S3/VIA KM266/VT8375 aka ProSavage8
43 * S3/VIA KM133/VT8365 aka Savage4
44 *
45 * Two serial busses are implemented:
46 * SERIAL1 - I2C serial communications interface
47 * SERIAL2 - DDC2 monitor communications interface
48 *
49 * Tested on a FX41 mainboard, see http://www.shuttle.com
50 *
51 *
52 * TODO:
53 * - integration with prosavage framebuffer device
54 * (Additional documentation needed :(
55 */
56
57#include <linux/module.h>
58#include <linux/init.h>
59#include <linux/pci.h>
60#include <linux/i2c.h>
61#include <linux/i2c-algo-bit.h>
62#include <asm/io.h>
63
64/*
65 * driver configuration
66 */
67#define MAX_BUSSES 2
68
69struct s_i2c_bus {
70 void __iomem *mmvga;
71 int i2c_reg;
72 int adap_ok;
73 struct i2c_adapter adap;
74 struct i2c_algo_bit_data algo;
75};
76
77struct s_i2c_chip {
78 void __iomem *mmio;
79 struct s_i2c_bus i2c_bus[MAX_BUSSES];
80};
81
82
83/*
84 * i2c configuration
85 */
86#define CYCLE_DELAY 10
87#define TIMEOUT (HZ / 2)
88
89
90/*
91 * S3/VIA 8365/8375 registers
92 */
93#define VGA_CR_IX 0x3d4
94#define VGA_CR_DATA 0x3d5
95
96#define CR_SERIAL1 0xa0 /* I2C serial communications interface */
97#define MM_SERIAL1 0xff20
98#define CR_SERIAL2 0xb1 /* DDC2 monitor communications interface */
99
100/* based on vt8365 documentation */
101#define I2C_ENAB 0x10
102#define I2C_SCL_OUT 0x01
103#define I2C_SDA_OUT 0x02
104#define I2C_SCL_IN 0x04
105#define I2C_SDA_IN 0x08
106
107#define SET_CR_IX(p, val) writeb((val), (p)->mmvga + VGA_CR_IX)
108#define SET_CR_DATA(p, val) writeb((val), (p)->mmvga + VGA_CR_DATA)
109#define GET_CR_DATA(p) readb((p)->mmvga + VGA_CR_DATA)
110
111
112/*
113 * Serial bus line handling
114 *
115 * serial communications register as parameter in private data
116 *
117 * TODO: locks with other code sections accessing video registers?
118 */
119static void bit_s3via_setscl(void *bus, int val)
120{
121 struct s_i2c_bus *p = (struct s_i2c_bus *)bus;
122 unsigned int r;
123
124 SET_CR_IX(p, p->i2c_reg);
125 r = GET_CR_DATA(p);
126 r |= I2C_ENAB;
127 if (val) {
128 r |= I2C_SCL_OUT;
129 } else {
130 r &= ~I2C_SCL_OUT;
131 }
132 SET_CR_DATA(p, r);
133}
134
135static void bit_s3via_setsda(void *bus, int val)
136{
137 struct s_i2c_bus *p = (struct s_i2c_bus *)bus;
138 unsigned int r;
139
140 SET_CR_IX(p, p->i2c_reg);
141 r = GET_CR_DATA(p);
142 r |= I2C_ENAB;
143 if (val) {
144 r |= I2C_SDA_OUT;
145 } else {
146 r &= ~I2C_SDA_OUT;
147 }
148 SET_CR_DATA(p, r);
149}
150
151static int bit_s3via_getscl(void *bus)
152{
153 struct s_i2c_bus *p = (struct s_i2c_bus *)bus;
154
155 SET_CR_IX(p, p->i2c_reg);
156 return (0 != (GET_CR_DATA(p) & I2C_SCL_IN));
157}
158
159static int bit_s3via_getsda(void *bus)
160{
161 struct s_i2c_bus *p = (struct s_i2c_bus *)bus;
162
163 SET_CR_IX(p, p->i2c_reg);
164 return (0 != (GET_CR_DATA(p) & I2C_SDA_IN));
165}
166
167
168/*
169 * adapter initialisation
170 */
171static int i2c_register_bus(struct pci_dev *dev, struct s_i2c_bus *p, void __iomem *mmvga, u32 i2c_reg)
172{
173 int ret;
174 p->adap.owner = THIS_MODULE;
175 p->adap.id = I2C_HW_B_S3VIA;
176 p->adap.algo_data = &p->algo;
177 p->adap.dev.parent = &dev->dev;
178 p->algo.setsda = bit_s3via_setsda;
179 p->algo.setscl = bit_s3via_setscl;
180 p->algo.getsda = bit_s3via_getsda;
181 p->algo.getscl = bit_s3via_getscl;
182 p->algo.udelay = CYCLE_DELAY;
183 p->algo.timeout = TIMEOUT;
184 p->algo.data = p;
185 p->mmvga = mmvga;
186 p->i2c_reg = i2c_reg;
187
188 ret = i2c_bit_add_bus(&p->adap);
189 if (ret) {
190 return ret;
191 }
192
193 p->adap_ok = 1;
194 return 0;
195}
196
197
198/*
199 * Cleanup stuff
200 */
201static void prosavage_remove(struct pci_dev *dev)
202{
203 struct s_i2c_chip *chip;
204 int i, ret;
205
206 chip = (struct s_i2c_chip *)pci_get_drvdata(dev);
207
208 if (!chip) {
209 return;
210 }
211 for (i = MAX_BUSSES - 1; i >= 0; i--) {
212 if (chip->i2c_bus[i].adap_ok == 0)
213 continue;
214
215 ret = i2c_del_adapter(&chip->i2c_bus[i].adap);
216 if (ret) {
217 dev_err(&dev->dev, "%s not removed\n",
218 chip->i2c_bus[i].adap.name);
219 }
220 }
221 if (chip->mmio) {
222 iounmap(chip->mmio);
223 }
224 kfree(chip);
225}
226
227
228/*
229 * Detect chip and initialize it
230 */
231static int __devinit prosavage_probe(struct pci_dev *dev, const struct pci_device_id *id)
232{
233 int ret;
234 unsigned long base, len;
235 struct s_i2c_chip *chip;
236 struct s_i2c_bus *bus;
237
238 pci_set_drvdata(dev, kzalloc(sizeof(struct s_i2c_chip), GFP_KERNEL));
239 chip = (struct s_i2c_chip *)pci_get_drvdata(dev);
240 if (chip == NULL) {
241 return -ENOMEM;
242 }
243
244 base = dev->resource[0].start & PCI_BASE_ADDRESS_MEM_MASK;
245 len = dev->resource[0].end - base + 1;
246 chip->mmio = ioremap_nocache(base, len);
247
248 if (chip->mmio == NULL) {
249 dev_err(&dev->dev, "ioremap failed\n");
250 prosavage_remove(dev);
251 return -ENODEV;
252 }
253
254
255 /*
256 * Chip initialisation
257 */
258 /* Unlock Extended IO Space ??? */
259
260
261 /*
262 * i2c bus registration
263 */
264 bus = &chip->i2c_bus[0];
265 snprintf(bus->adap.name, sizeof(bus->adap.name),
266 "ProSavage I2C bus at %02x:%02x.%x",
267 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
268 ret = i2c_register_bus(dev, bus, chip->mmio + 0x8000, CR_SERIAL1);
269 if (ret) {
270 goto err_adap;
271 }
272 /*
273 * ddc bus registration
274 */
275 bus = &chip->i2c_bus[1];
276 snprintf(bus->adap.name, sizeof(bus->adap.name),
277 "ProSavage DDC bus at %02x:%02x.%x",
278 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
279 ret = i2c_register_bus(dev, bus, chip->mmio + 0x8000, CR_SERIAL2);
280 if (ret) {
281 goto err_adap;
282 }
283 return 0;
284err_adap:
285 dev_err(&dev->dev, "%s failed\n", bus->adap.name);
286 prosavage_remove(dev);
287 return ret;
288}
289
290
291/*
292 * Data for PCI driver interface
293 */
294static struct pci_device_id prosavage_pci_tbl[] = {
295 { PCI_DEVICE(PCI_VENDOR_ID_S3, PCI_DEVICE_ID_S3_SAVAGE4) },
296 { PCI_DEVICE(PCI_VENDOR_ID_S3, PCI_DEVICE_ID_S3_PROSAVAGE8) },
297 { 0, },
298};
299
300MODULE_DEVICE_TABLE (pci, prosavage_pci_tbl);
301
302static struct pci_driver prosavage_driver = {
303 .name = "prosavage_smbus",
304 .id_table = prosavage_pci_tbl,
305 .probe = prosavage_probe,
306 .remove = prosavage_remove,
307};
308
309static int __init i2c_prosavage_init(void)
310{
311 return pci_register_driver(&prosavage_driver);
312}
313
314static void __exit i2c_prosavage_exit(void)
315{
316 pci_unregister_driver(&prosavage_driver);
317}
318
319MODULE_DEVICE_TABLE(pci, prosavage_pci_tbl);
320MODULE_AUTHOR("Henk Vergonet");
321MODULE_DESCRIPTION("ProSavage VIA 8365/8375 smbus driver");
322MODULE_LICENSE("GPL");
323
324module_init (i2c_prosavage_init);
325module_exit (i2c_prosavage_exit);
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index dde6ce963a19..af9e6034d7fb 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -1104,5 +1104,5 @@ static void __exit i2c_adap_pxa_exit(void)
1104MODULE_LICENSE("GPL"); 1104MODULE_LICENSE("GPL");
1105MODULE_ALIAS("platform:pxa2xx-i2c"); 1105MODULE_ALIAS("platform:pxa2xx-i2c");
1106 1106
1107module_init(i2c_adap_pxa_init); 1107subsys_initcall(i2c_adap_pxa_init);
1108module_exit(i2c_adap_pxa_exit); 1108module_exit(i2c_adap_pxa_exit);
diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index 9e8c875437be..007390ad9810 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -590,7 +590,7 @@ static struct s3c24xx_i2c s3c24xx_i2c = {
590 .owner = THIS_MODULE, 590 .owner = THIS_MODULE,
591 .algo = &s3c24xx_i2c_algorithm, 591 .algo = &s3c24xx_i2c_algorithm,
592 .retries = 2, 592 .retries = 2,
593 .class = I2C_CLASS_HWMON, 593 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
594 }, 594 },
595}; 595};
596 596
diff --git a/drivers/i2c/busses/i2c-savage4.c b/drivers/i2c/busses/i2c-savage4.c
deleted file mode 100644
index 8adf4abaa035..000000000000
--- a/drivers/i2c/busses/i2c-savage4.c
+++ /dev/null
@@ -1,185 +0,0 @@
1/*
2 i2c-savage4.c - Part of lm_sensors, Linux kernel modules for hardware
3 monitoring
4 Copyright (C) 1998-2003 The LM Sensors Team
5 Alexander Wold <awold@bigfoot.com>
6 Mark D. Studebaker <mdsxyz123@yahoo.com>
7
8 Based on i2c-voodoo3.c.
9
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23*/
24
25/* This interfaces to the I2C bus of the Savage4 to gain access to
26 the BT869 and possibly other I2C devices. The DDC bus is not
27 yet supported because its register is not memory-mapped.
28*/
29
30#include <linux/kernel.h>
31#include <linux/module.h>
32#include <linux/init.h>
33#include <linux/pci.h>
34#include <linux/i2c.h>
35#include <linux/i2c-algo-bit.h>
36#include <asm/io.h>
37
38/* device IDs */
39#define PCI_CHIP_SAVAGE4 0x8A22
40#define PCI_CHIP_SAVAGE2000 0x9102
41
42#define REG 0xff20 /* Serial Port 1 Register */
43
44/* bit locations in the register */
45#define I2C_ENAB 0x00000020
46#define I2C_SCL_OUT 0x00000001
47#define I2C_SDA_OUT 0x00000002
48#define I2C_SCL_IN 0x00000008
49#define I2C_SDA_IN 0x00000010
50
51/* delays */
52#define CYCLE_DELAY 10
53#define TIMEOUT (HZ / 2)
54
55
56static void __iomem *ioaddr;
57
58/* The sav GPIO registers don't have individual masks for each bit
59 so we always have to read before writing. */
60
61static void bit_savi2c_setscl(void *data, int val)
62{
63 unsigned int r;
64 r = readl(ioaddr + REG);
65 if(val)
66 r |= I2C_SCL_OUT;
67 else
68 r &= ~I2C_SCL_OUT;
69 writel(r, ioaddr + REG);
70 readl(ioaddr + REG); /* flush posted write */
71}
72
73static void bit_savi2c_setsda(void *data, int val)
74{
75 unsigned int r;
76 r = readl(ioaddr + REG);
77 if(val)
78 r |= I2C_SDA_OUT;
79 else
80 r &= ~I2C_SDA_OUT;
81 writel(r, ioaddr + REG);
82 readl(ioaddr + REG); /* flush posted write */
83}
84
85/* The GPIO pins are open drain, so the pins always remain outputs.
86 We rely on the i2c-algo-bit routines to set the pins high before
87 reading the input from other chips. */
88
89static int bit_savi2c_getscl(void *data)
90{
91 return (0 != (readl(ioaddr + REG) & I2C_SCL_IN));
92}
93
94static int bit_savi2c_getsda(void *data)
95{
96 return (0 != (readl(ioaddr + REG) & I2C_SDA_IN));
97}
98
99/* Configures the chip */
100
101static int config_s4(struct pci_dev *dev)
102{
103 unsigned long cadr;
104
105 /* map memory */
106 cadr = dev->resource[0].start;
107 cadr &= PCI_BASE_ADDRESS_MEM_MASK;
108 ioaddr = ioremap_nocache(cadr, 0x0080000);
109 if (ioaddr) {
110 /* writel(0x8160, ioaddr + REG2); */
111 writel(0x00000020, ioaddr + REG);
112 dev_info(&dev->dev, "Using Savage4 at %p\n", ioaddr);
113 return 0;
114 }
115 return -ENODEV;
116}
117
118static struct i2c_algo_bit_data sav_i2c_bit_data = {
119 .setsda = bit_savi2c_setsda,
120 .setscl = bit_savi2c_setscl,
121 .getsda = bit_savi2c_getsda,
122 .getscl = bit_savi2c_getscl,
123 .udelay = CYCLE_DELAY,
124 .timeout = TIMEOUT
125};
126
127static struct i2c_adapter savage4_i2c_adapter = {
128 .owner = THIS_MODULE,
129 .id = I2C_HW_B_SAVAGE,
130 .name = "I2C Savage4 adapter",
131 .algo_data = &sav_i2c_bit_data,
132};
133
134static struct pci_device_id savage4_ids[] __devinitdata = {
135 { PCI_DEVICE(PCI_VENDOR_ID_S3, PCI_CHIP_SAVAGE4) },
136 { PCI_DEVICE(PCI_VENDOR_ID_S3, PCI_CHIP_SAVAGE2000) },
137 { 0, }
138};
139
140MODULE_DEVICE_TABLE (pci, savage4_ids);
141
142static int __devinit savage4_probe(struct pci_dev *dev, const struct pci_device_id *id)
143{
144 int retval;
145
146 retval = config_s4(dev);
147 if (retval)
148 return retval;
149
150 /* set up the sysfs linkage to our parent device */
151 savage4_i2c_adapter.dev.parent = &dev->dev;
152
153 return i2c_bit_add_bus(&savage4_i2c_adapter);
154}
155
156static void __devexit savage4_remove(struct pci_dev *dev)
157{
158 i2c_del_adapter(&savage4_i2c_adapter);
159 iounmap(ioaddr);
160}
161
162static struct pci_driver savage4_driver = {
163 .name = "savage4_smbus",
164 .id_table = savage4_ids,
165 .probe = savage4_probe,
166 .remove = __devexit_p(savage4_remove),
167};
168
169static int __init i2c_savage4_init(void)
170{
171 return pci_register_driver(&savage4_driver);
172}
173
174static void __exit i2c_savage4_exit(void)
175{
176 pci_unregister_driver(&savage4_driver);
177}
178
179MODULE_AUTHOR("Alexander Wold <awold@bigfoot.com> "
180 "and Mark D. Studebaker <mdsxyz123@yahoo.com>");
181MODULE_DESCRIPTION("Savage4 I2C/SMBus driver");
182MODULE_LICENSE("GPL");
183
184module_init(i2c_savage4_init);
185module_exit(i2c_savage4_exit);
diff --git a/drivers/i2c/busses/i2c-sibyte.c b/drivers/i2c/busses/i2c-sibyte.c
index 114634da6c6e..4ddefbf238e9 100644
--- a/drivers/i2c/busses/i2c-sibyte.c
+++ b/drivers/i2c/busses/i2c-sibyte.c
@@ -143,7 +143,7 @@ static int __init i2c_sibyte_add_bus(struct i2c_adapter *i2c_adap, int speed)
143 csr_out32(speed, SMB_CSR(adap,R_SMB_FREQ)); 143 csr_out32(speed, SMB_CSR(adap,R_SMB_FREQ));
144 csr_out32(0, SMB_CSR(adap,R_SMB_CONTROL)); 144 csr_out32(0, SMB_CSR(adap,R_SMB_CONTROL));
145 145
146 return i2c_add_adapter(i2c_adap); 146 return i2c_add_numbered_adapter(i2c_adap);
147} 147}
148 148
149 149
@@ -156,17 +156,19 @@ static struct i2c_adapter sibyte_board_adapter[2] = {
156 { 156 {
157 .owner = THIS_MODULE, 157 .owner = THIS_MODULE,
158 .id = I2C_HW_SIBYTE, 158 .id = I2C_HW_SIBYTE,
159 .class = I2C_CLASS_HWMON, 159 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
160 .algo = NULL, 160 .algo = NULL,
161 .algo_data = &sibyte_board_data[0], 161 .algo_data = &sibyte_board_data[0],
162 .nr = 0,
162 .name = "SiByte SMBus 0", 163 .name = "SiByte SMBus 0",
163 }, 164 },
164 { 165 {
165 .owner = THIS_MODULE, 166 .owner = THIS_MODULE,
166 .id = I2C_HW_SIBYTE, 167 .id = I2C_HW_SIBYTE,
167 .class = I2C_CLASS_HWMON, 168 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
168 .algo = NULL, 169 .algo = NULL,
169 .algo_data = &sibyte_board_data[1], 170 .algo_data = &sibyte_board_data[1],
171 .nr = 1,
170 .name = "SiByte SMBus 1", 172 .name = "SiByte SMBus 1",
171 }, 173 },
172}; 174};
diff --git a/drivers/i2c/busses/i2c-sis5595.c b/drivers/i2c/busses/i2c-sis5595.c
index 9ca8f9155f95..dfc2d5eb6a68 100644
--- a/drivers/i2c/busses/i2c-sis5595.c
+++ b/drivers/i2c/busses/i2c-sis5595.c
@@ -1,6 +1,4 @@
1/* 1/*
2 sis5595.c - Part of lm_sensors, Linux kernel modules for hardware
3 monitoring
4 Copyright (c) 1998, 1999 Frodo Looijaard <frodol@dds.nl> and 2 Copyright (c) 1998, 1999 Frodo Looijaard <frodol@dds.nl> and
5 Philip Edelbrock <phil@netroedge.com> 3 Philip Edelbrock <phil@netroedge.com>
6 4
@@ -62,6 +60,7 @@
62#include <linux/ioport.h> 60#include <linux/ioport.h>
63#include <linux/init.h> 61#include <linux/init.h>
64#include <linux/i2c.h> 62#include <linux/i2c.h>
63#include <linux/acpi.h>
65#include <asm/io.h> 64#include <asm/io.h>
66 65
67static int blacklist[] = { 66static int blacklist[] = {
@@ -174,6 +173,11 @@ static int sis5595_setup(struct pci_dev *SIS5595_dev)
174 173
175 /* NB: We grab just the two SMBus registers here, but this may still 174 /* NB: We grab just the two SMBus registers here, but this may still
176 * interfere with ACPI :-( */ 175 * interfere with ACPI :-( */
176 retval = acpi_check_region(sis5595_base + SMB_INDEX, 2,
177 sis5595_driver.name);
178 if (retval)
179 return retval;
180
177 if (!request_region(sis5595_base + SMB_INDEX, 2, 181 if (!request_region(sis5595_base + SMB_INDEX, 2,
178 sis5595_driver.name)) { 182 sis5595_driver.name)) {
179 dev_err(&SIS5595_dev->dev, "SMBus registers 0x%04x-0x%04x already in use!\n", 183 dev_err(&SIS5595_dev->dev, "SMBus registers 0x%04x-0x%04x already in use!\n",
@@ -236,7 +240,7 @@ static int sis5595_transaction(struct i2c_adapter *adap)
236 sis5595_write(SMB_STS_HI, temp >> 8); 240 sis5595_write(SMB_STS_HI, temp >> 8);
237 if ((temp = sis5595_read(SMB_STS_LO) + (sis5595_read(SMB_STS_HI) << 8)) != 0x00) { 241 if ((temp = sis5595_read(SMB_STS_LO) + (sis5595_read(SMB_STS_HI) << 8)) != 0x00) {
238 dev_dbg(&adap->dev, "Failed! (%02x)\n", temp); 242 dev_dbg(&adap->dev, "Failed! (%02x)\n", temp);
239 return -1; 243 return -EBUSY;
240 } else { 244 } else {
241 dev_dbg(&adap->dev, "Successful!\n"); 245 dev_dbg(&adap->dev, "Successful!\n");
242 } 246 }
@@ -254,19 +258,19 @@ static int sis5595_transaction(struct i2c_adapter *adap)
254 /* If the SMBus is still busy, we give up */ 258 /* If the SMBus is still busy, we give up */
255 if (timeout >= MAX_TIMEOUT) { 259 if (timeout >= MAX_TIMEOUT) {
256 dev_dbg(&adap->dev, "SMBus Timeout!\n"); 260 dev_dbg(&adap->dev, "SMBus Timeout!\n");
257 result = -1; 261 result = -ETIMEDOUT;
258 } 262 }
259 263
260 if (temp & 0x10) { 264 if (temp & 0x10) {
261 dev_dbg(&adap->dev, "Error: Failed bus transaction\n"); 265 dev_dbg(&adap->dev, "Error: Failed bus transaction\n");
262 result = -1; 266 result = -ENXIO;
263 } 267 }
264 268
265 if (temp & 0x20) { 269 if (temp & 0x20) {
266 dev_err(&adap->dev, "Bus collision! SMBus may be locked until " 270 dev_err(&adap->dev, "Bus collision! SMBus may be locked until "
267 "next hard reset (or not...)\n"); 271 "next hard reset (or not...)\n");
268 /* Clock stops and slave is stuck in mid-transmission */ 272 /* Clock stops and slave is stuck in mid-transmission */
269 result = -1; 273 result = -EIO;
270 } 274 }
271 275
272 temp = sis5595_read(SMB_STS_LO) + (sis5595_read(SMB_STS_HI) << 8); 276 temp = sis5595_read(SMB_STS_LO) + (sis5595_read(SMB_STS_HI) << 8);
@@ -282,11 +286,13 @@ static int sis5595_transaction(struct i2c_adapter *adap)
282 return result; 286 return result;
283} 287}
284 288
285/* Return -1 on error. */ 289/* Return negative errno on error. */
286static s32 sis5595_access(struct i2c_adapter *adap, u16 addr, 290static s32 sis5595_access(struct i2c_adapter *adap, u16 addr,
287 unsigned short flags, char read_write, 291 unsigned short flags, char read_write,
288 u8 command, int size, union i2c_smbus_data *data) 292 u8 command, int size, union i2c_smbus_data *data)
289{ 293{
294 int status;
295
290 switch (size) { 296 switch (size) {
291 case I2C_SMBUS_QUICK: 297 case I2C_SMBUS_QUICK:
292 sis5595_write(SMB_ADDR, ((addr & 0x7f) << 1) | (read_write & 0x01)); 298 sis5595_write(SMB_ADDR, ((addr & 0x7f) << 1) | (read_write & 0x01));
@@ -318,13 +324,14 @@ static s32 sis5595_access(struct i2c_adapter *adap, u16 addr,
318 break; 324 break;
319 default: 325 default:
320 dev_warn(&adap->dev, "Unsupported transaction %d\n", size); 326 dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
321 return -1; 327 return -EOPNOTSUPP;
322 } 328 }
323 329
324 sis5595_write(SMB_CTL_LO, ((size & 0x0E))); 330 sis5595_write(SMB_CTL_LO, ((size & 0x0E)));
325 331
326 if (sis5595_transaction(adap)) 332 status = sis5595_transaction(adap);
327 return -1; 333 if (status)
334 return status;
328 335
329 if ((size != SIS5595_PROC_CALL) && 336 if ((size != SIS5595_PROC_CALL) &&
330 ((read_write == I2C_SMBUS_WRITE) || (size == SIS5595_QUICK))) 337 ((read_write == I2C_SMBUS_WRITE) || (size == SIS5595_QUICK)))
@@ -359,7 +366,7 @@ static const struct i2c_algorithm smbus_algorithm = {
359static struct i2c_adapter sis5595_adapter = { 366static struct i2c_adapter sis5595_adapter = {
360 .owner = THIS_MODULE, 367 .owner = THIS_MODULE,
361 .id = I2C_HW_SMBUS_SIS5595, 368 .id = I2C_HW_SMBUS_SIS5595,
362 .class = I2C_CLASS_HWMON, 369 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
363 .algo = &smbus_algorithm, 370 .algo = &smbus_algorithm,
364}; 371};
365 372
diff --git a/drivers/i2c/busses/i2c-sis630.c b/drivers/i2c/busses/i2c-sis630.c
index 3765dd7f450f..e7c4b790da54 100644
--- a/drivers/i2c/busses/i2c-sis630.c
+++ b/drivers/i2c/busses/i2c-sis630.c
@@ -1,7 +1,4 @@
1/* 1/*
2 i2c-sis630.c - Part of lm_sensors, Linux kernel modules for hardware
3 monitoring
4
5 Copyright (c) 2002,2003 Alexander Malysh <amalysh@web.de> 2 Copyright (c) 2002,2003 Alexander Malysh <amalysh@web.de>
6 3
7 This program is free software; you can redistribute it and/or modify 4 This program is free software; you can redistribute it and/or modify
@@ -55,6 +52,7 @@
55#include <linux/ioport.h> 52#include <linux/ioport.h>
56#include <linux/init.h> 53#include <linux/init.h>
57#include <linux/i2c.h> 54#include <linux/i2c.h>
55#include <linux/acpi.h>
58#include <asm/io.h> 56#include <asm/io.h>
59 57
60/* SIS630 SMBus registers */ 58/* SIS630 SMBus registers */
@@ -134,7 +132,7 @@ static int sis630_transaction_start(struct i2c_adapter *adap, int size, u8 *oldc
134 132
135 if ((temp = sis630_read(SMB_CNT) & 0x03) != 0x00) { 133 if ((temp = sis630_read(SMB_CNT) & 0x03) != 0x00) {
136 dev_dbg(&adap->dev, "Failed! (%02x)\n", temp); 134 dev_dbg(&adap->dev, "Failed! (%02x)\n", temp);
137 return -1; 135 return -EBUSY;
138 } else { 136 } else {
139 dev_dbg(&adap->dev, "Successful!\n"); 137 dev_dbg(&adap->dev, "Successful!\n");
140 } 138 }
@@ -177,17 +175,17 @@ static int sis630_transaction_wait(struct i2c_adapter *adap, int size)
177 /* If the SMBus is still busy, we give up */ 175 /* If the SMBus is still busy, we give up */
178 if (timeout >= MAX_TIMEOUT) { 176 if (timeout >= MAX_TIMEOUT) {
179 dev_dbg(&adap->dev, "SMBus Timeout!\n"); 177 dev_dbg(&adap->dev, "SMBus Timeout!\n");
180 result = -1; 178 result = -ETIMEDOUT;
181 } 179 }
182 180
183 if (temp & 0x02) { 181 if (temp & 0x02) {
184 dev_dbg(&adap->dev, "Error: Failed bus transaction\n"); 182 dev_dbg(&adap->dev, "Error: Failed bus transaction\n");
185 result = -1; 183 result = -ENXIO;
186 } 184 }
187 185
188 if (temp & 0x04) { 186 if (temp & 0x04) {
189 dev_err(&adap->dev, "Bus collision!\n"); 187 dev_err(&adap->dev, "Bus collision!\n");
190 result = -1; 188 result = -EIO;
191 /* 189 /*
192 TBD: Datasheet say: 190 TBD: Datasheet say:
193 the software should clear this bit and restart SMBUS operation. 191 the software should clear this bit and restart SMBUS operation.
@@ -250,8 +248,10 @@ static int sis630_block_data(struct i2c_adapter *adap, union i2c_smbus_data *dat
250 if (i==8 || (len<8 && i==len)) { 248 if (i==8 || (len<8 && i==len)) {
251 dev_dbg(&adap->dev, "start trans len=%d i=%d\n",len ,i); 249 dev_dbg(&adap->dev, "start trans len=%d i=%d\n",len ,i);
252 /* first transaction */ 250 /* first transaction */
253 if (sis630_transaction_start(adap, SIS630_BLOCK_DATA, &oldclock)) 251 rc = sis630_transaction_start(adap,
254 return -1; 252 SIS630_BLOCK_DATA, &oldclock);
253 if (rc)
254 return rc;
255 } 255 }
256 else if ((i-1)%8 == 7 || i==len) { 256 else if ((i-1)%8 == 7 || i==len) {
257 dev_dbg(&adap->dev, "trans_wait len=%d i=%d\n",len,i); 257 dev_dbg(&adap->dev, "trans_wait len=%d i=%d\n",len,i);
@@ -264,9 +264,10 @@ static int sis630_block_data(struct i2c_adapter *adap, union i2c_smbus_data *dat
264 */ 264 */
265 sis630_write(SMB_STS,0x10); 265 sis630_write(SMB_STS,0x10);
266 } 266 }
267 if (sis630_transaction_wait(adap, SIS630_BLOCK_DATA)) { 267 rc = sis630_transaction_wait(adap,
268 SIS630_BLOCK_DATA);
269 if (rc) {
268 dev_dbg(&adap->dev, "trans_wait failed\n"); 270 dev_dbg(&adap->dev, "trans_wait failed\n");
269 rc = -1;
270 break; 271 break;
271 } 272 }
272 } 273 }
@@ -275,13 +276,14 @@ static int sis630_block_data(struct i2c_adapter *adap, union i2c_smbus_data *dat
275 else { 276 else {
276 /* read request */ 277 /* read request */
277 data->block[0] = len = 0; 278 data->block[0] = len = 0;
278 if (sis630_transaction_start(adap, SIS630_BLOCK_DATA, &oldclock)) { 279 rc = sis630_transaction_start(adap,
279 return -1; 280 SIS630_BLOCK_DATA, &oldclock);
280 } 281 if (rc)
282 return rc;
281 do { 283 do {
282 if (sis630_transaction_wait(adap, SIS630_BLOCK_DATA)) { 284 rc = sis630_transaction_wait(adap, SIS630_BLOCK_DATA);
285 if (rc) {
283 dev_dbg(&adap->dev, "trans_wait failed\n"); 286 dev_dbg(&adap->dev, "trans_wait failed\n");
284 rc = -1;
285 break; 287 break;
286 } 288 }
287 /* if this first transaction then read byte count */ 289 /* if this first transaction then read byte count */
@@ -311,11 +313,13 @@ static int sis630_block_data(struct i2c_adapter *adap, union i2c_smbus_data *dat
311 return rc; 313 return rc;
312} 314}
313 315
314/* Return -1 on error. */ 316/* Return negative errno on error. */
315static s32 sis630_access(struct i2c_adapter *adap, u16 addr, 317static s32 sis630_access(struct i2c_adapter *adap, u16 addr,
316 unsigned short flags, char read_write, 318 unsigned short flags, char read_write,
317 u8 command, int size, union i2c_smbus_data *data) 319 u8 command, int size, union i2c_smbus_data *data)
318{ 320{
321 int status;
322
319 switch (size) { 323 switch (size) {
320 case I2C_SMBUS_QUICK: 324 case I2C_SMBUS_QUICK:
321 sis630_write(SMB_ADDR, ((addr & 0x7f) << 1) | (read_write & 0x01)); 325 sis630_write(SMB_ADDR, ((addr & 0x7f) << 1) | (read_write & 0x01));
@@ -350,13 +354,14 @@ static s32 sis630_access(struct i2c_adapter *adap, u16 addr,
350 size = SIS630_BLOCK_DATA; 354 size = SIS630_BLOCK_DATA;
351 return sis630_block_data(adap, data, read_write); 355 return sis630_block_data(adap, data, read_write);
352 default: 356 default:
353 printk("Unsupported I2C size\n"); 357 dev_warn(&adap->dev, "Unsupported transaction %d\n",
354 return -1; 358 size);
355 break; 359 return -EOPNOTSUPP;
356 } 360 }
357 361
358 if (sis630_transaction(adap, size)) 362 status = sis630_transaction(adap, size);
359 return -1; 363 if (status)
364 return status;
360 365
361 if ((size != SIS630_PCALL) && 366 if ((size != SIS630_PCALL) &&
362 ((read_write == I2C_SMBUS_WRITE) || (size == SIS630_QUICK))) { 367 ((read_write == I2C_SMBUS_WRITE) || (size == SIS630_QUICK))) {
@@ -372,9 +377,6 @@ static s32 sis630_access(struct i2c_adapter *adap, u16 addr,
372 case SIS630_WORD_DATA: 377 case SIS630_WORD_DATA:
373 data->word = sis630_read(SMB_BYTE) + (sis630_read(SMB_BYTE + 1) << 8); 378 data->word = sis630_read(SMB_BYTE) + (sis630_read(SMB_BYTE + 1) << 8);
374 break; 379 break;
375 default:
376 return -1;
377 break;
378 } 380 }
379 381
380 return 0; 382 return 0;
@@ -433,6 +435,11 @@ static int sis630_setup(struct pci_dev *sis630_dev)
433 435
434 dev_dbg(&sis630_dev->dev, "ACPI base at 0x%04x\n", acpi_base); 436 dev_dbg(&sis630_dev->dev, "ACPI base at 0x%04x\n", acpi_base);
435 437
438 retval = acpi_check_region(acpi_base + SMB_STS, SIS630_SMB_IOREGION,
439 sis630_driver.name);
440 if (retval)
441 goto exit;
442
436 /* Everything is happy, let's grab the memory and set things up. */ 443 /* Everything is happy, let's grab the memory and set things up. */
437 if (!request_region(acpi_base + SMB_STS, SIS630_SMB_IOREGION, 444 if (!request_region(acpi_base + SMB_STS, SIS630_SMB_IOREGION,
438 sis630_driver.name)) { 445 sis630_driver.name)) {
@@ -458,7 +465,7 @@ static const struct i2c_algorithm smbus_algorithm = {
458static struct i2c_adapter sis630_adapter = { 465static struct i2c_adapter sis630_adapter = {
459 .owner = THIS_MODULE, 466 .owner = THIS_MODULE,
460 .id = I2C_HW_SMBUS_SIS630, 467 .id = I2C_HW_SMBUS_SIS630,
461 .class = I2C_CLASS_HWMON, 468 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
462 .algo = &smbus_algorithm, 469 .algo = &smbus_algorithm,
463}; 470};
464 471
diff --git a/drivers/i2c/busses/i2c-sis96x.c b/drivers/i2c/busses/i2c-sis96x.c
index dc235bb8e24d..f1bba6396641 100644
--- a/drivers/i2c/busses/i2c-sis96x.c
+++ b/drivers/i2c/busses/i2c-sis96x.c
@@ -1,7 +1,4 @@
1/* 1/*
2 sis96x.c - Part of lm_sensors, Linux kernel modules for hardware
3 monitoring
4
5 Copyright (c) 2003 Mark M. Hoffman <mhoffman@lightlink.com> 2 Copyright (c) 2003 Mark M. Hoffman <mhoffman@lightlink.com>
6 3
7 This program is free software; you can redistribute it and/or modify 4 This program is free software; you can redistribute it and/or modify
@@ -40,6 +37,7 @@
40#include <linux/ioport.h> 37#include <linux/ioport.h>
41#include <linux/i2c.h> 38#include <linux/i2c.h>
42#include <linux/init.h> 39#include <linux/init.h>
40#include <linux/acpi.h>
43#include <asm/io.h> 41#include <asm/io.h>
44 42
45/* base address register in PCI config space */ 43/* base address register in PCI config space */
@@ -111,7 +109,7 @@ static int sis96x_transaction(int size)
111 /* check it again */ 109 /* check it again */
112 if (((temp = sis96x_read(SMB_CNT)) & 0x03) != 0x00) { 110 if (((temp = sis96x_read(SMB_CNT)) & 0x03) != 0x00) {
113 dev_dbg(&sis96x_adapter.dev, "Failed (0x%02x)\n", temp); 111 dev_dbg(&sis96x_adapter.dev, "Failed (0x%02x)\n", temp);
114 return -1; 112 return -EBUSY;
115 } else { 113 } else {
116 dev_dbg(&sis96x_adapter.dev, "Successful\n"); 114 dev_dbg(&sis96x_adapter.dev, "Successful\n");
117 } 115 }
@@ -136,19 +134,19 @@ static int sis96x_transaction(int size)
136 /* If the SMBus is still busy, we give up */ 134 /* If the SMBus is still busy, we give up */
137 if (timeout >= MAX_TIMEOUT) { 135 if (timeout >= MAX_TIMEOUT) {
138 dev_dbg(&sis96x_adapter.dev, "SMBus Timeout! (0x%02x)\n", temp); 136 dev_dbg(&sis96x_adapter.dev, "SMBus Timeout! (0x%02x)\n", temp);
139 result = -1; 137 result = -ETIMEDOUT;
140 } 138 }
141 139
142 /* device error - probably missing ACK */ 140 /* device error - probably missing ACK */
143 if (temp & 0x02) { 141 if (temp & 0x02) {
144 dev_dbg(&sis96x_adapter.dev, "Failed bus transaction!\n"); 142 dev_dbg(&sis96x_adapter.dev, "Failed bus transaction!\n");
145 result = -1; 143 result = -ENXIO;
146 } 144 }
147 145
148 /* bus collision */ 146 /* bus collision */
149 if (temp & 0x04) { 147 if (temp & 0x04) {
150 dev_dbg(&sis96x_adapter.dev, "Bus collision!\n"); 148 dev_dbg(&sis96x_adapter.dev, "Bus collision!\n");
151 result = -1; 149 result = -EIO;
152 } 150 }
153 151
154 /* Finish up by resetting the bus */ 152 /* Finish up by resetting the bus */
@@ -161,11 +159,12 @@ static int sis96x_transaction(int size)
161 return result; 159 return result;
162} 160}
163 161
164/* Return -1 on error. */ 162/* Return negative errno on error. */
165static s32 sis96x_access(struct i2c_adapter * adap, u16 addr, 163static s32 sis96x_access(struct i2c_adapter * adap, u16 addr,
166 unsigned short flags, char read_write, 164 unsigned short flags, char read_write,
167 u8 command, int size, union i2c_smbus_data * data) 165 u8 command, int size, union i2c_smbus_data * data)
168{ 166{
167 int status;
169 168
170 switch (size) { 169 switch (size) {
171 case I2C_SMBUS_QUICK: 170 case I2C_SMBUS_QUICK:
@@ -200,20 +199,14 @@ static s32 sis96x_access(struct i2c_adapter * adap, u16 addr,
200 SIS96x_PROC_CALL : SIS96x_WORD_DATA); 199 SIS96x_PROC_CALL : SIS96x_WORD_DATA);
201 break; 200 break;
202 201
203 case I2C_SMBUS_BLOCK_DATA:
204 /* TO DO: */
205 dev_info(&adap->dev, "SMBus block not implemented!\n");
206 return -1;
207 break;
208
209 default: 202 default:
210 dev_info(&adap->dev, "Unsupported I2C size\n"); 203 dev_warn(&adap->dev, "Unsupported transaction %d\n", size);
211 return -1; 204 return -EOPNOTSUPP;
212 break;
213 } 205 }
214 206
215 if (sis96x_transaction(size)) 207 status = sis96x_transaction(size);
216 return -1; 208 if (status)
209 return status;
217 210
218 if ((size != SIS96x_PROC_CALL) && 211 if ((size != SIS96x_PROC_CALL) &&
219 ((read_write == I2C_SMBUS_WRITE) || (size == SIS96x_QUICK))) 212 ((read_write == I2C_SMBUS_WRITE) || (size == SIS96x_QUICK)))
@@ -249,7 +242,7 @@ static const struct i2c_algorithm smbus_algorithm = {
249static struct i2c_adapter sis96x_adapter = { 242static struct i2c_adapter sis96x_adapter = {
250 .owner = THIS_MODULE, 243 .owner = THIS_MODULE,
251 .id = I2C_HW_SMBUS_SIS96X, 244 .id = I2C_HW_SMBUS_SIS96X,
252 .class = I2C_CLASS_HWMON, 245 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
253 .algo = &smbus_algorithm, 246 .algo = &smbus_algorithm,
254}; 247};
255 248
@@ -286,6 +279,10 @@ static int __devinit sis96x_probe(struct pci_dev *dev,
286 dev_info(&dev->dev, "SiS96x SMBus base address: 0x%04x\n", 279 dev_info(&dev->dev, "SiS96x SMBus base address: 0x%04x\n",
287 sis96x_smbus_base); 280 sis96x_smbus_base);
288 281
282 retval = acpi_check_resource_conflict(&dev->resource[SIS96x_BAR]);
283 if (retval)
284 return retval;
285
289 /* Everything is happy, let's grab the memory and set things up. */ 286 /* Everything is happy, let's grab the memory and set things up. */
290 if (!request_region(sis96x_smbus_base, SMB_IOSIZE, 287 if (!request_region(sis96x_smbus_base, SMB_IOSIZE,
291 sis96x_driver.name)) { 288 sis96x_driver.name)) {
diff --git a/drivers/i2c/busses/i2c-stub.c b/drivers/i2c/busses/i2c-stub.c
index d08eeec53913..1b7b2af94036 100644
--- a/drivers/i2c/busses/i2c-stub.c
+++ b/drivers/i2c/busses/i2c-stub.c
@@ -43,7 +43,7 @@ struct stub_chip {
43 43
44static struct stub_chip *stub_chips; 44static struct stub_chip *stub_chips;
45 45
46/* Return -1 on error. */ 46/* Return negative errno on error. */
47static s32 stub_xfer(struct i2c_adapter * adap, u16 addr, unsigned short flags, 47static s32 stub_xfer(struct i2c_adapter * adap, u16 addr, unsigned short flags,
48 char read_write, u8 command, int size, union i2c_smbus_data * data) 48 char read_write, u8 command, int size, union i2c_smbus_data * data)
49{ 49{
@@ -120,7 +120,7 @@ static s32 stub_xfer(struct i2c_adapter * adap, u16 addr, unsigned short flags,
120 120
121 default: 121 default:
122 dev_dbg(&adap->dev, "Unsupported I2C/SMBus command\n"); 122 dev_dbg(&adap->dev, "Unsupported I2C/SMBus command\n");
123 ret = -1; 123 ret = -EOPNOTSUPP;
124 break; 124 break;
125 } /* switch (size) */ 125 } /* switch (size) */
126 126
@@ -140,7 +140,7 @@ static const struct i2c_algorithm smbus_algorithm = {
140 140
141static struct i2c_adapter stub_adapter = { 141static struct i2c_adapter stub_adapter = {
142 .owner = THIS_MODULE, 142 .owner = THIS_MODULE,
143 .class = I2C_CLASS_HWMON, 143 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
144 .algo = &smbus_algorithm, 144 .algo = &smbus_algorithm,
145 .name = "SMBus stub driver", 145 .name = "SMBus stub driver",
146}; 146};
diff --git a/drivers/i2c/busses/i2c-taos-evm.c b/drivers/i2c/busses/i2c-taos-evm.c
index de9db49e54d9..224aa12ee7c8 100644
--- a/drivers/i2c/busses/i2c-taos-evm.c
+++ b/drivers/i2c/busses/i2c-taos-evm.c
@@ -96,9 +96,8 @@ static int taos_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
96 sprintf(p, "$%02X", command); 96 sprintf(p, "$%02X", command);
97 break; 97 break;
98 default: 98 default:
99 dev_dbg(&adapter->dev, "Unsupported transaction size %d\n", 99 dev_warn(&adapter->dev, "Unsupported transaction %d\n", size);
100 size); 100 return -EOPNOTSUPP;
101 return -EINVAL;
102 } 101 }
103 102
104 /* Send the transaction to the TAOS EVM */ 103 /* Send the transaction to the TAOS EVM */
diff --git a/drivers/i2c/busses/i2c-via.c b/drivers/i2c/busses/i2c-via.c
index 61716f6b14dc..29cef0433f34 100644
--- a/drivers/i2c/busses/i2c-via.c
+++ b/drivers/i2c/busses/i2c-via.c
@@ -1,7 +1,4 @@
1/* 1/*
2 i2c-via.c - Part of lm_sensors, Linux kernel modules
3 for hardware monitoring
4
5 i2c Support for Via Technologies 82C586B South Bridge 2 i2c Support for Via Technologies 82C586B South Bridge
6 3
7 Copyright (c) 1998, 1999 Kyösti Mälkki <kmalkki@cc.hut.fi> 4 Copyright (c) 1998, 1999 Kyösti Mälkki <kmalkki@cc.hut.fi>
@@ -87,7 +84,7 @@ static struct i2c_algo_bit_data bit_data = {
87static struct i2c_adapter vt586b_adapter = { 84static struct i2c_adapter vt586b_adapter = {
88 .owner = THIS_MODULE, 85 .owner = THIS_MODULE,
89 .id = I2C_HW_B_VIA, 86 .id = I2C_HW_B_VIA,
90 .class = I2C_CLASS_HWMON, 87 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
91 .name = "VIA i2c", 88 .name = "VIA i2c",
92 .algo_data = &bit_data, 89 .algo_data = &bit_data,
93}; 90};
diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c
index 77b13d027f86..862eb352a2d9 100644
--- a/drivers/i2c/busses/i2c-viapro.c
+++ b/drivers/i2c/busses/i2c-viapro.c
@@ -1,6 +1,4 @@
1/* 1/*
2 i2c-viapro.c - Part of lm_sensors, Linux kernel modules for hardware
3 monitoring
4 Copyright (c) 1998 - 2002 Frodo Looijaard <frodol@dds.nl>, 2 Copyright (c) 1998 - 2002 Frodo Looijaard <frodol@dds.nl>,
5 Philip Edelbrock <phil@netroedge.com>, Kyösti Mälkki <kmalkki@cc.hut.fi>, 3 Philip Edelbrock <phil@netroedge.com>, Kyösti Mälkki <kmalkki@cc.hut.fi>,
6 Mark D. Studebaker <mdsxyz123@yahoo.com> 4 Mark D. Studebaker <mdsxyz123@yahoo.com>
@@ -50,6 +48,7 @@
50#include <linux/ioport.h> 48#include <linux/ioport.h>
51#include <linux/i2c.h> 49#include <linux/i2c.h>
52#include <linux/init.h> 50#include <linux/init.h>
51#include <linux/acpi.h>
53#include <asm/io.h> 52#include <asm/io.h>
54 53
55static struct pci_dev *vt596_pdev; 54static struct pci_dev *vt596_pdev;
@@ -152,7 +151,7 @@ static int vt596_transaction(u8 size)
152 if ((temp = inb_p(SMBHSTSTS)) & 0x1F) { 151 if ((temp = inb_p(SMBHSTSTS)) & 0x1F) {
153 dev_err(&vt596_adapter.dev, "SMBus reset failed! " 152 dev_err(&vt596_adapter.dev, "SMBus reset failed! "
154 "(0x%02x)\n", temp); 153 "(0x%02x)\n", temp);
155 return -1; 154 return -EBUSY;
156 } 155 }
157 } 156 }
158 157
@@ -167,24 +166,24 @@ static int vt596_transaction(u8 size)
167 166
168 /* If the SMBus is still busy, we give up */ 167 /* If the SMBus is still busy, we give up */
169 if (timeout >= MAX_TIMEOUT) { 168 if (timeout >= MAX_TIMEOUT) {
170 result = -1; 169 result = -ETIMEDOUT;
171 dev_err(&vt596_adapter.dev, "SMBus timeout!\n"); 170 dev_err(&vt596_adapter.dev, "SMBus timeout!\n");
172 } 171 }
173 172
174 if (temp & 0x10) { 173 if (temp & 0x10) {
175 result = -1; 174 result = -EIO;
176 dev_err(&vt596_adapter.dev, "Transaction failed (0x%02x)\n", 175 dev_err(&vt596_adapter.dev, "Transaction failed (0x%02x)\n",
177 size); 176 size);
178 } 177 }
179 178
180 if (temp & 0x08) { 179 if (temp & 0x08) {
181 result = -1; 180 result = -EIO;
182 dev_err(&vt596_adapter.dev, "SMBus collision!\n"); 181 dev_err(&vt596_adapter.dev, "SMBus collision!\n");
183 } 182 }
184 183
185 if (temp & 0x04) { 184 if (temp & 0x04) {
186 int read = inb_p(SMBHSTADD) & 0x01; 185 int read = inb_p(SMBHSTADD) & 0x01;
187 result = -1; 186 result = -ENXIO;
188 /* The quick and receive byte commands are used to probe 187 /* The quick and receive byte commands are used to probe
189 for chips, so errors are expected, and we don't want 188 for chips, so errors are expected, and we don't want
190 to frighten the user. */ 189 to frighten the user. */
@@ -202,12 +201,13 @@ static int vt596_transaction(u8 size)
202 return result; 201 return result;
203} 202}
204 203
205/* Return -1 on error, 0 on success */ 204/* Return negative errno on error, 0 on success */
206static s32 vt596_access(struct i2c_adapter *adap, u16 addr, 205static s32 vt596_access(struct i2c_adapter *adap, u16 addr,
207 unsigned short flags, char read_write, u8 command, 206 unsigned short flags, char read_write, u8 command,
208 int size, union i2c_smbus_data *data) 207 int size, union i2c_smbus_data *data)
209{ 208{
210 int i; 209 int i;
210 int status;
211 211
212 switch (size) { 212 switch (size) {
213 case I2C_SMBUS_QUICK: 213 case I2C_SMBUS_QUICK:
@@ -258,8 +258,9 @@ static s32 vt596_access(struct i2c_adapter *adap, u16 addr,
258 258
259 outb_p(((addr & 0x7f) << 1) | read_write, SMBHSTADD); 259 outb_p(((addr & 0x7f) << 1) | read_write, SMBHSTADD);
260 260
261 if (vt596_transaction(size)) /* Error in transaction */ 261 status = vt596_transaction(size);
262 return -1; 262 if (status)
263 return status;
263 264
264 if ((read_write == I2C_SMBUS_WRITE) || (size == VT596_QUICK)) 265 if ((read_write == I2C_SMBUS_WRITE) || (size == VT596_QUICK))
265 return 0; 266 return 0;
@@ -285,9 +286,9 @@ static s32 vt596_access(struct i2c_adapter *adap, u16 addr,
285 return 0; 286 return 0;
286 287
287exit_unsupported: 288exit_unsupported:
288 dev_warn(&vt596_adapter.dev, "Unsupported command invoked! (0x%02x)\n", 289 dev_warn(&vt596_adapter.dev, "Unsupported transaction %d\n",
289 size); 290 size);
290 return -1; 291 return -EOPNOTSUPP;
291} 292}
292 293
293static u32 vt596_func(struct i2c_adapter *adapter) 294static u32 vt596_func(struct i2c_adapter *adapter)
@@ -309,7 +310,7 @@ static const struct i2c_algorithm smbus_algorithm = {
309static struct i2c_adapter vt596_adapter = { 310static struct i2c_adapter vt596_adapter = {
310 .owner = THIS_MODULE, 311 .owner = THIS_MODULE,
311 .id = I2C_HW_SMBUS_VIA2, 312 .id = I2C_HW_SMBUS_VIA2,
312 .class = I2C_CLASS_HWMON, 313 .class = I2C_CLASS_HWMON | I2C_CLASS_SPD,
313 .algo = &smbus_algorithm, 314 .algo = &smbus_algorithm,
314}; 315};
315 316
@@ -354,6 +355,10 @@ static int __devinit vt596_probe(struct pci_dev *pdev,
354 } 355 }
355 356
356found: 357found:
358 error = acpi_check_region(vt596_smba, 8, vt596_driver.name);
359 if (error)
360 return error;
361
357 if (!request_region(vt596_smba, 8, vt596_driver.name)) { 362 if (!request_region(vt596_smba, 8, vt596_driver.name)) {
358 dev_err(&pdev->dev, "SMBus region 0x%x already in use!\n", 363 dev_err(&pdev->dev, "SMBus region 0x%x already in use!\n",
359 vt596_smba); 364 vt596_smba);
diff --git a/drivers/i2c/busses/i2c-voodoo3.c b/drivers/i2c/busses/i2c-voodoo3.c
index 88a3447e11e1..1d4ae26ba73d 100644
--- a/drivers/i2c/busses/i2c-voodoo3.c
+++ b/drivers/i2c/busses/i2c-voodoo3.c
@@ -1,6 +1,4 @@
1/* 1/*
2 voodoo3.c - Part of lm_sensors, Linux kernel modules for hardware
3 monitoring
4 Copyright (c) 1998, 1999 Frodo Looijaard <frodol@dds.nl>, 2 Copyright (c) 1998, 1999 Frodo Looijaard <frodol@dds.nl>,
5 Philip Edelbrock <phil@netroedge.com>, 3 Philip Edelbrock <phil@netroedge.com>,
6 Ralph Metzler <rjkm@thp.uni-koeln.de>, and 4 Ralph Metzler <rjkm@thp.uni-koeln.de>, and
diff --git a/drivers/i2c/busses/scx200_acb.c b/drivers/i2c/busses/scx200_acb.c
index 61abe0f33255..ed794b145a11 100644
--- a/drivers/i2c/busses/scx200_acb.c
+++ b/drivers/i2c/busses/scx200_acb.c
@@ -442,7 +442,7 @@ static __init struct scx200_acb_iface *scx200_create_iface(const char *text,
442 adapter->owner = THIS_MODULE; 442 adapter->owner = THIS_MODULE;
443 adapter->id = I2C_HW_SMBUS_SCX200; 443 adapter->id = I2C_HW_SMBUS_SCX200;
444 adapter->algo = &scx200_acb_algorithm; 444 adapter->algo = &scx200_acb_algorithm;
445 adapter->class = I2C_CLASS_HWMON; 445 adapter->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
446 adapter->dev.parent = dev; 446 adapter->dev.parent = dev;
447 447
448 mutex_init(&iface->mutex); 448 mutex_init(&iface->mutex);
diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig
index 2da2edfa68ec..50e0a4653741 100644
--- a/drivers/i2c/chips/Kconfig
+++ b/drivers/i2c/chips/Kconfig
@@ -14,6 +14,32 @@ config DS1682
14 This driver can also be built as a module. If so, the module 14 This driver can also be built as a module. If so, the module
15 will be called ds1682. 15 will be called ds1682.
16 16
17config AT24
18 tristate "EEPROMs from most vendors"
19 depends on SYSFS && EXPERIMENTAL
20 help
21 Enable this driver to get read/write support to most I2C EEPROMs,
22 after you configure the driver to know about each EEPROM on
23 your target board. Use these generic chip names, instead of
24 vendor-specific ones like at24c64 or 24lc02:
25
26 24c00, 24c01, 24c02, spd (readonly 24c02), 24c04, 24c08,
27 24c16, 24c32, 24c64, 24c128, 24c256, 24c512, 24c1024
28
29 Unless you like data loss puzzles, always be sure that any chip
30 you configure as a 24c32 (32 kbit) or larger is NOT really a
31 24c16 (16 kbit) or smaller, and vice versa. Marking the chip
32 as read-only won't help recover from this. Also, if your chip
33 has any software write-protect mechanism you may want to review the
34 code to make sure this driver won't turn it on by accident.
35
36 If you use this with an SMBus adapter instead of an I2C adapter,
37 full functionality is not available. Only smaller devices are
38 supported (24c16 and below, max 4 kByte).
39
40 This driver can also be built as a module. If so, the module
41 will be called at24.
42
17config SENSORS_EEPROM 43config SENSORS_EEPROM
18 tristate "EEPROM reader" 44 tristate "EEPROM reader"
19 depends on EXPERIMENTAL 45 depends on EXPERIMENTAL
@@ -26,8 +52,8 @@ config SENSORS_EEPROM
26 will be called eeprom. 52 will be called eeprom.
27 53
28config SENSORS_PCF8574 54config SENSORS_PCF8574
29 tristate "Philips PCF8574 and PCF8574A" 55 tristate "Philips PCF8574 and PCF8574A (DEPRECATED)"
30 depends on EXPERIMENTAL 56 depends on EXPERIMENTAL && GPIO_PCF857X = "n"
31 default n 57 default n
32 help 58 help
33 If you say yes here you get support for Philips PCF8574 and 59 If you say yes here you get support for Philips PCF8574 and
@@ -36,12 +62,16 @@ config SENSORS_PCF8574
36 This driver can also be built as a module. If so, the module 62 This driver can also be built as a module. If so, the module
37 will be called pcf8574. 63 will be called pcf8574.
38 64
65 This driver is deprecated and will be dropped soon. Use
66 drivers/gpio/pcf857x.c instead.
67
39 These devices are hard to detect and rarely found on mainstream 68 These devices are hard to detect and rarely found on mainstream
40 hardware. If unsure, say N. 69 hardware. If unsure, say N.
41 70
42config PCF8575 71config PCF8575
43 tristate "Philips PCF8575" 72 tristate "Philips PCF8575 (DEPRECATED)"
44 default n 73 default n
74 depends on GPIO_PCF857X = "n"
45 help 75 help
46 If you say yes here you get support for Philips PCF8575 chip. 76 If you say yes here you get support for Philips PCF8575 chip.
47 This chip is a 16-bit I/O expander for the I2C bus. Several other 77 This chip is a 16-bit I/O expander for the I2C bus. Several other
@@ -50,12 +80,15 @@ config PCF8575
50 This driver can also be built as a module. If so, the module 80 This driver can also be built as a module. If so, the module
51 will be called pcf8575. 81 will be called pcf8575.
52 82
83 This driver is deprecated and will be dropped soon. Use
84 drivers/gpio/pcf857x.c instead.
85
53 This device is hard to detect and is rarely found on mainstream 86 This device is hard to detect and is rarely found on mainstream
54 hardware. If unsure, say N. 87 hardware. If unsure, say N.
55 88
56config SENSORS_PCA9539 89config SENSORS_PCA9539
57 tristate "Philips PCA9539 16-bit I/O port (DEPRECATED)" 90 tristate "Philips PCA9539 16-bit I/O port (DEPRECATED)"
58 depends on EXPERIMENTAL && GPIO_PCA9539 = "n" 91 depends on EXPERIMENTAL && GPIO_PCA953X = "n"
59 help 92 help
60 If you say yes here you get support for the Philips PCA9539 93 If you say yes here you get support for the Philips PCA9539
61 16-bit I/O port. 94 16-bit I/O port.
@@ -64,7 +97,7 @@ config SENSORS_PCA9539
64 will be called pca9539. 97 will be called pca9539.
65 98
66 This driver is deprecated and will be dropped soon. Use 99 This driver is deprecated and will be dropped soon. Use
67 drivers/gpio/pca9539.c instead. 100 drivers/gpio/pca953x.c instead.
68 101
69config SENSORS_PCF8591 102config SENSORS_PCF8591
70 tristate "Philips PCF8591" 103 tristate "Philips PCF8591"
diff --git a/drivers/i2c/chips/Makefile b/drivers/i2c/chips/Makefile
index e47aca0ca5ae..39e3e69ed125 100644
--- a/drivers/i2c/chips/Makefile
+++ b/drivers/i2c/chips/Makefile
@@ -10,6 +10,7 @@
10# 10#
11 11
12obj-$(CONFIG_DS1682) += ds1682.o 12obj-$(CONFIG_DS1682) += ds1682.o
13obj-$(CONFIG_AT24) += at24.o
13obj-$(CONFIG_SENSORS_EEPROM) += eeprom.o 14obj-$(CONFIG_SENSORS_EEPROM) += eeprom.o
14obj-$(CONFIG_SENSORS_MAX6875) += max6875.o 15obj-$(CONFIG_SENSORS_MAX6875) += max6875.o
15obj-$(CONFIG_SENSORS_PCA9539) += pca9539.o 16obj-$(CONFIG_SENSORS_PCA9539) += pca9539.o
diff --git a/drivers/i2c/chips/at24.c b/drivers/i2c/chips/at24.c
new file mode 100644
index 000000000000..e764c94f3e3d
--- /dev/null
+++ b/drivers/i2c/chips/at24.c
@@ -0,0 +1,583 @@
1/*
2 * at24.c - handle most I2C EEPROMs
3 *
4 * Copyright (C) 2005-2007 David Brownell
5 * Copyright (C) 2008 Wolfram Sang, Pengutronix
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 */
12#include <linux/kernel.h>
13#include <linux/init.h>
14#include <linux/module.h>
15#include <linux/slab.h>
16#include <linux/delay.h>
17#include <linux/mutex.h>
18#include <linux/sysfs.h>
19#include <linux/mod_devicetable.h>
20#include <linux/log2.h>
21#include <linux/bitops.h>
22#include <linux/jiffies.h>
23#include <linux/i2c.h>
24#include <linux/i2c/at24.h>
25
26/*
27 * I2C EEPROMs from most vendors are inexpensive and mostly interchangeable.
28 * Differences between different vendor product lines (like Atmel AT24C or
29 * MicroChip 24LC, etc) won't much matter for typical read/write access.
30 * There are also I2C RAM chips, likewise interchangeable. One example
31 * would be the PCF8570, which acts like a 24c02 EEPROM (256 bytes).
32 *
33 * However, misconfiguration can lose data. "Set 16-bit memory address"
34 * to a part with 8-bit addressing will overwrite data. Writing with too
35 * big a page size also loses data. And it's not safe to assume that the
36 * conventional addresses 0x50..0x57 only hold eeproms; a PCF8563 RTC
37 * uses 0x51, for just one example.
38 *
39 * Accordingly, explicit board-specific configuration data should be used
40 * in almost all cases. (One partial exception is an SMBus used to access
41 * "SPD" data for DRAM sticks. Those only use 24c02 EEPROMs.)
42 *
43 * So this driver uses "new style" I2C driver binding, expecting to be
44 * told what devices exist. That may be in arch/X/mach-Y/board-Z.c or
45 * similar kernel-resident tables; or, configuration data coming from
46 * a bootloader.
47 *
48 * Other than binding model, current differences from "eeprom" driver are
49 * that this one handles write access and isn't restricted to 24c02 devices.
50 * It also handles larger devices (32 kbit and up) with two-byte addresses,
51 * which won't work on pure SMBus systems.
52 */
53
54struct at24_data {
55 struct at24_platform_data chip;
56 bool use_smbus;
57
58 /*
59 * Lock protects against activities from other Linux tasks,
60 * but not from changes by other I2C masters.
61 */
62 struct mutex lock;
63 struct bin_attribute bin;
64
65 u8 *writebuf;
66 unsigned write_max;
67 unsigned num_addresses;
68
69 /*
70 * Some chips tie up multiple I2C addresses; dummy devices reserve
71 * them for us, and we'll use them with SMBus calls.
72 */
73 struct i2c_client *client[];
74};
75
76/*
77 * This parameter is to help this driver avoid blocking other drivers out
78 * of I2C for potentially troublesome amounts of time. With a 100 kHz I2C
79 * clock, one 256 byte read takes about 1/43 second which is excessive;
80 * but the 1/170 second it takes at 400 kHz may be quite reasonable; and
81 * at 1 MHz (Fm+) a 1/430 second delay could easily be invisible.
82 *
83 * This value is forced to be a power of two so that writes align on pages.
84 */
85static unsigned io_limit = 128;
86module_param(io_limit, uint, 0);
87MODULE_PARM_DESC(io_limit, "Maximum bytes per I/O (default 128)");
88
89/*
90 * Specs often allow 5 msec for a page write, sometimes 20 msec;
91 * it's important to recover from write timeouts.
92 */
93static unsigned write_timeout = 25;
94module_param(write_timeout, uint, 0);
95MODULE_PARM_DESC(write_timeout, "Time (in ms) to try writes (default 25)");
96
97#define AT24_SIZE_BYTELEN 5
98#define AT24_SIZE_FLAGS 8
99
100#define AT24_BITMASK(x) (BIT(x) - 1)
101
102/* create non-zero magic value for given eeprom parameters */
103#define AT24_DEVICE_MAGIC(_len, _flags) \
104 ((1 << AT24_SIZE_FLAGS | (_flags)) \
105 << AT24_SIZE_BYTELEN | ilog2(_len))
106
107static const struct i2c_device_id at24_ids[] = {
108 /* needs 8 addresses as A0-A2 are ignored */
109 { "24c00", AT24_DEVICE_MAGIC(128 / 8, AT24_FLAG_TAKE8ADDR) },
110 /* old variants can't be handled with this generic entry! */
111 { "24c01", AT24_DEVICE_MAGIC(1024 / 8, 0) },
112 { "24c02", AT24_DEVICE_MAGIC(2048 / 8, 0) },
113 /* spd is a 24c02 in memory DIMMs */
114 { "spd", AT24_DEVICE_MAGIC(2048 / 8,
115 AT24_FLAG_READONLY | AT24_FLAG_IRUGO) },
116 { "24c04", AT24_DEVICE_MAGIC(4096 / 8, 0) },
117 /* 24rf08 quirk is handled at i2c-core */
118 { "24c08", AT24_DEVICE_MAGIC(8192 / 8, 0) },
119 { "24c16", AT24_DEVICE_MAGIC(16384 / 8, 0) },
120 { "24c32", AT24_DEVICE_MAGIC(32768 / 8, AT24_FLAG_ADDR16) },
121 { "24c64", AT24_DEVICE_MAGIC(65536 / 8, AT24_FLAG_ADDR16) },
122 { "24c128", AT24_DEVICE_MAGIC(131072 / 8, AT24_FLAG_ADDR16) },
123 { "24c256", AT24_DEVICE_MAGIC(262144 / 8, AT24_FLAG_ADDR16) },
124 { "24c512", AT24_DEVICE_MAGIC(524288 / 8, AT24_FLAG_ADDR16) },
125 { "24c1024", AT24_DEVICE_MAGIC(1048576 / 8, AT24_FLAG_ADDR16) },
126 { "at24", 0 },
127 { /* END OF LIST */ }
128};
129MODULE_DEVICE_TABLE(i2c, at24_ids);
130
131/*-------------------------------------------------------------------------*/
132
133/*
134 * This routine supports chips which consume multiple I2C addresses. It
135 * computes the addressing information to be used for a given r/w request.
136 * Assumes that sanity checks for offset happened at sysfs-layer.
137 */
138static struct i2c_client *at24_translate_offset(struct at24_data *at24,
139 unsigned *offset)
140{
141 unsigned i;
142
143 if (at24->chip.flags & AT24_FLAG_ADDR16) {
144 i = *offset >> 16;
145 *offset &= 0xffff;
146 } else {
147 i = *offset >> 8;
148 *offset &= 0xff;
149 }
150
151 return at24->client[i];
152}
153
154static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf,
155 unsigned offset, size_t count)
156{
157 struct i2c_msg msg[2];
158 u8 msgbuf[2];
159 struct i2c_client *client;
160 int status, i;
161
162 memset(msg, 0, sizeof(msg));
163
164 /*
165 * REVISIT some multi-address chips don't rollover page reads to
166 * the next slave address, so we may need to truncate the count.
167 * Those chips might need another quirk flag.
168 *
169 * If the real hardware used four adjacent 24c02 chips and that
170 * were misconfigured as one 24c08, that would be a similar effect:
171 * one "eeprom" file not four, but larger reads would fail when
172 * they crossed certain pages.
173 */
174
175 /*
176 * Slave address and byte offset derive from the offset. Always
177 * set the byte address; on a multi-master board, another master
178 * may have changed the chip's "current" address pointer.
179 */
180 client = at24_translate_offset(at24, &offset);
181
182 if (count > io_limit)
183 count = io_limit;
184
185 /* Smaller eeproms can work given some SMBus extension calls */
186 if (at24->use_smbus) {
187 if (count > I2C_SMBUS_BLOCK_MAX)
188 count = I2C_SMBUS_BLOCK_MAX;
189 status = i2c_smbus_read_i2c_block_data(client, offset,
190 count, buf);
191 dev_dbg(&client->dev, "smbus read %zd@%d --> %d\n",
192 count, offset, status);
193 return (status < 0) ? -EIO : status;
194 }
195
196 /*
197 * When we have a better choice than SMBus calls, use a combined
198 * I2C message. Write address; then read up to io_limit data bytes.
199 * Note that read page rollover helps us here (unlike writes).
200 * msgbuf is u8 and will cast to our needs.
201 */
202 i = 0;
203 if (at24->chip.flags & AT24_FLAG_ADDR16)
204 msgbuf[i++] = offset >> 8;
205 msgbuf[i++] = offset;
206
207 msg[0].addr = client->addr;
208 msg[0].buf = msgbuf;
209 msg[0].len = i;
210
211 msg[1].addr = client->addr;
212 msg[1].flags = I2C_M_RD;
213 msg[1].buf = buf;
214 msg[1].len = count;
215
216 status = i2c_transfer(client->adapter, msg, 2);
217 dev_dbg(&client->dev, "i2c read %zd@%d --> %d\n",
218 count, offset, status);
219
220 if (status == 2)
221 return count;
222 else if (status >= 0)
223 return -EIO;
224 else
225 return status;
226}
227
228static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr,
229 char *buf, loff_t off, size_t count)
230{
231 struct at24_data *at24;
232 ssize_t retval = 0;
233
234 at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
235
236 if (unlikely(!count))
237 return count;
238
239 /*
240 * Read data from chip, protecting against concurrent updates
241 * from this host, but not from other I2C masters.
242 */
243 mutex_lock(&at24->lock);
244
245 while (count) {
246 ssize_t status;
247
248 status = at24_eeprom_read(at24, buf, off, count);
249 if (status <= 0) {
250 if (retval == 0)
251 retval = status;
252 break;
253 }
254 buf += status;
255 off += status;
256 count -= status;
257 retval += status;
258 }
259
260 mutex_unlock(&at24->lock);
261
262 return retval;
263}
264
265
266/*
267 * REVISIT: export at24_bin{read,write}() to let other kernel code use
268 * eeprom data. For example, it might hold a board's Ethernet address, or
269 * board-specific calibration data generated on the manufacturing floor.
270 */
271
272
273/*
274 * Note that if the hardware write-protect pin is pulled high, the whole
275 * chip is normally write protected. But there are plenty of product
276 * variants here, including OTP fuses and partial chip protect.
277 *
278 * We only use page mode writes; the alternative is sloooow. This routine
279 * writes at most one page.
280 */
281static ssize_t at24_eeprom_write(struct at24_data *at24, char *buf,
282 unsigned offset, size_t count)
283{
284 struct i2c_client *client;
285 struct i2c_msg msg;
286 ssize_t status;
287 unsigned long timeout, write_time;
288 unsigned next_page;
289
290 /* Get corresponding I2C address and adjust offset */
291 client = at24_translate_offset(at24, &offset);
292
293 /* write_max is at most a page */
294 if (count > at24->write_max)
295 count = at24->write_max;
296
297 /* Never roll over backwards, to the start of this page */
298 next_page = roundup(offset + 1, at24->chip.page_size);
299 if (offset + count > next_page)
300 count = next_page - offset;
301
302 /* If we'll use I2C calls for I/O, set up the message */
303 if (!at24->use_smbus) {
304 int i = 0;
305
306 msg.addr = client->addr;
307 msg.flags = 0;
308
309 /* msg.buf is u8 and casts will mask the values */
310 msg.buf = at24->writebuf;
311 if (at24->chip.flags & AT24_FLAG_ADDR16)
312 msg.buf[i++] = offset >> 8;
313
314 msg.buf[i++] = offset;
315 memcpy(&msg.buf[i], buf, count);
316 msg.len = i + count;
317 }
318
319 /*
320 * Writes fail if the previous one didn't complete yet. We may
321 * loop a few times until this one succeeds, waiting at least
322 * long enough for one entire page write to work.
323 */
324 timeout = jiffies + msecs_to_jiffies(write_timeout);
325 do {
326 write_time = jiffies;
327 if (at24->use_smbus) {
328 status = i2c_smbus_write_i2c_block_data(client,
329 offset, count, buf);
330 if (status == 0)
331 status = count;
332 } else {
333 status = i2c_transfer(client->adapter, &msg, 1);
334 if (status == 1)
335 status = count;
336 }
337 dev_dbg(&client->dev, "write %zd@%d --> %zd (%ld)\n",
338 count, offset, status, jiffies);
339
340 if (status == count)
341 return count;
342
343 /* REVISIT: at HZ=100, this is sloooow */
344 msleep(1);
345 } while (time_before(write_time, timeout));
346
347 return -ETIMEDOUT;
348}
349
350static ssize_t at24_bin_write(struct kobject *kobj, struct bin_attribute *attr,
351 char *buf, loff_t off, size_t count)
352{
353 struct at24_data *at24;
354 ssize_t retval = 0;
355
356 at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
357
358 if (unlikely(!count))
359 return count;
360
361 /*
362 * Write data to chip, protecting against concurrent updates
363 * from this host, but not from other I2C masters.
364 */
365 mutex_lock(&at24->lock);
366
367 while (count) {
368 ssize_t status;
369
370 status = at24_eeprom_write(at24, buf, off, count);
371 if (status <= 0) {
372 if (retval == 0)
373 retval = status;
374 break;
375 }
376 buf += status;
377 off += status;
378 count -= status;
379 retval += status;
380 }
381
382 mutex_unlock(&at24->lock);
383
384 return retval;
385}
386
387/*-------------------------------------------------------------------------*/
388
389static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
390{
391 struct at24_platform_data chip;
392 bool writable;
393 bool use_smbus = false;
394 struct at24_data *at24;
395 int err;
396 unsigned i, num_addresses;
397 kernel_ulong_t magic;
398
399 if (client->dev.platform_data) {
400 chip = *(struct at24_platform_data *)client->dev.platform_data;
401 } else {
402 if (!id->driver_data) {
403 err = -ENODEV;
404 goto err_out;
405 }
406 magic = id->driver_data;
407 chip.byte_len = BIT(magic & AT24_BITMASK(AT24_SIZE_BYTELEN));
408 magic >>= AT24_SIZE_BYTELEN;
409 chip.flags = magic & AT24_BITMASK(AT24_SIZE_FLAGS);
410 /*
411 * This is slow, but we can't know all eeproms, so we better
412 * play safe. Specifying custom eeprom-types via platform_data
413 * is recommended anyhow.
414 */
415 chip.page_size = 1;
416 }
417
418 if (!is_power_of_2(chip.byte_len))
419 dev_warn(&client->dev,
420 "byte_len looks suspicious (no power of 2)!\n");
421 if (!is_power_of_2(chip.page_size))
422 dev_warn(&client->dev,
423 "page_size looks suspicious (no power of 2)!\n");
424
425 /* Use I2C operations unless we're stuck with SMBus extensions. */
426 if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
427 if (chip.flags & AT24_FLAG_ADDR16) {
428 err = -EPFNOSUPPORT;
429 goto err_out;
430 }
431 if (!i2c_check_functionality(client->adapter,
432 I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
433 err = -EPFNOSUPPORT;
434 goto err_out;
435 }
436 use_smbus = true;
437 }
438
439 if (chip.flags & AT24_FLAG_TAKE8ADDR)
440 num_addresses = 8;
441 else
442 num_addresses = DIV_ROUND_UP(chip.byte_len,
443 (chip.flags & AT24_FLAG_ADDR16) ? 65536 : 256);
444
445 at24 = kzalloc(sizeof(struct at24_data) +
446 num_addresses * sizeof(struct i2c_client *), GFP_KERNEL);
447 if (!at24) {
448 err = -ENOMEM;
449 goto err_out;
450 }
451
452 mutex_init(&at24->lock);
453 at24->use_smbus = use_smbus;
454 at24->chip = chip;
455 at24->num_addresses = num_addresses;
456
457 /*
458 * Export the EEPROM bytes through sysfs, since that's convenient.
459 * By default, only root should see the data (maybe passwords etc)
460 */
461 at24->bin.attr.name = "eeprom";
462 at24->bin.attr.mode = chip.flags & AT24_FLAG_IRUGO ? S_IRUGO : S_IRUSR;
463 at24->bin.attr.owner = THIS_MODULE;
464 at24->bin.read = at24_bin_read;
465 at24->bin.size = chip.byte_len;
466
467 writable = !(chip.flags & AT24_FLAG_READONLY);
468 if (writable) {
469 if (!use_smbus || i2c_check_functionality(client->adapter,
470 I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) {
471
472 unsigned write_max = chip.page_size;
473
474 at24->bin.write = at24_bin_write;
475 at24->bin.attr.mode |= S_IWUSR;
476
477 if (write_max > io_limit)
478 write_max = io_limit;
479 if (use_smbus && write_max > I2C_SMBUS_BLOCK_MAX)
480 write_max = I2C_SMBUS_BLOCK_MAX;
481 at24->write_max = write_max;
482
483 /* buffer (data + address at the beginning) */
484 at24->writebuf = kmalloc(write_max + 2, GFP_KERNEL);
485 if (!at24->writebuf) {
486 err = -ENOMEM;
487 goto err_struct;
488 }
489 } else {
490 dev_warn(&client->dev,
491 "cannot write due to controller restrictions.");
492 }
493 }
494
495 at24->client[0] = client;
496
497 /* use dummy devices for multiple-address chips */
498 for (i = 1; i < num_addresses; i++) {
499 at24->client[i] = i2c_new_dummy(client->adapter,
500 client->addr + i);
501 if (!at24->client[i]) {
502 dev_err(&client->dev, "address 0x%02x unavailable\n",
503 client->addr + i);
504 err = -EADDRINUSE;
505 goto err_clients;
506 }
507 }
508
509 err = sysfs_create_bin_file(&client->dev.kobj, &at24->bin);
510 if (err)
511 goto err_clients;
512
513 i2c_set_clientdata(client, at24);
514
515 dev_info(&client->dev, "%Zd byte %s EEPROM %s\n",
516 at24->bin.size, client->name,
517 writable ? "(writable)" : "(read-only)");
518 dev_dbg(&client->dev,
519 "page_size %d, num_addresses %d, write_max %d%s\n",
520 chip.page_size, num_addresses,
521 at24->write_max,
522 use_smbus ? ", use_smbus" : "");
523
524 return 0;
525
526err_clients:
527 for (i = 1; i < num_addresses; i++)
528 if (at24->client[i])
529 i2c_unregister_device(at24->client[i]);
530
531 kfree(at24->writebuf);
532err_struct:
533 kfree(at24);
534err_out:
535 dev_dbg(&client->dev, "probe error %d\n", err);
536 return err;
537}
538
539static int __devexit at24_remove(struct i2c_client *client)
540{
541 struct at24_data *at24;
542 int i;
543
544 at24 = i2c_get_clientdata(client);
545 sysfs_remove_bin_file(&client->dev.kobj, &at24->bin);
546
547 for (i = 1; i < at24->num_addresses; i++)
548 i2c_unregister_device(at24->client[i]);
549
550 kfree(at24->writebuf);
551 kfree(at24);
552 i2c_set_clientdata(client, NULL);
553 return 0;
554}
555
556/*-------------------------------------------------------------------------*/
557
558static struct i2c_driver at24_driver = {
559 .driver = {
560 .name = "at24",
561 .owner = THIS_MODULE,
562 },
563 .probe = at24_probe,
564 .remove = __devexit_p(at24_remove),
565 .id_table = at24_ids,
566};
567
568static int __init at24_init(void)
569{
570 io_limit = rounddown_pow_of_two(io_limit);
571 return i2c_add_driver(&at24_driver);
572}
573module_init(at24_init);
574
575static void __exit at24_exit(void)
576{
577 i2c_del_driver(&at24_driver);
578}
579module_exit(at24_exit);
580
581MODULE_DESCRIPTION("Driver for most I2C EEPROMs");
582MODULE_AUTHOR("David Brownell and Wolfram Sang");
583MODULE_LICENSE("GPL");
diff --git a/drivers/i2c/chips/eeprom.c b/drivers/i2c/chips/eeprom.c
index 7dee001e5133..373ea8d8fe8f 100644
--- a/drivers/i2c/chips/eeprom.c
+++ b/drivers/i2c/chips/eeprom.c
@@ -1,15 +1,9 @@
1/* 1/*
2 eeprom.c - Part of lm_sensors, Linux kernel modules for hardware
3 monitoring
4 Copyright (C) 1998, 1999 Frodo Looijaard <frodol@dds.nl> and 2 Copyright (C) 1998, 1999 Frodo Looijaard <frodol@dds.nl> and
5 Philip Edelbrock <phil@netroedge.com> 3 Philip Edelbrock <phil@netroedge.com>
6 Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com> 4 Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com>
7 Copyright (C) 2003 IBM Corp. 5 Copyright (C) 2003 IBM Corp.
8 6 Copyright (C) 2004 Jean Delvare <khali@linux-fr.org>
9 2004-01-16 Jean Delvare <khali@linux-fr.org>
10 Divide the eeprom in 32-byte (arbitrary) slices. This significantly
11 speeds sensors up, as well as various scripts using the eeprom
12 module.
13 7
14 This program is free software; you can redistribute it and/or modify 8 This program is free software; you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by 9 it under the terms of the GNU General Public License as published by
@@ -78,7 +72,7 @@ static struct i2c_driver eeprom_driver = {
78static void eeprom_update_client(struct i2c_client *client, u8 slice) 72static void eeprom_update_client(struct i2c_client *client, u8 slice)
79{ 73{
80 struct eeprom_data *data = i2c_get_clientdata(client); 74 struct eeprom_data *data = i2c_get_clientdata(client);
81 int i, j; 75 int i;
82 76
83 mutex_lock(&data->update_lock); 77 mutex_lock(&data->update_lock);
84 78
@@ -93,15 +87,12 @@ static void eeprom_update_client(struct i2c_client *client, u8 slice)
93 != 32) 87 != 32)
94 goto exit; 88 goto exit;
95 } else { 89 } else {
96 if (i2c_smbus_write_byte(client, slice << 5)) { 90 for (i = slice << 5; i < (slice + 1) << 5; i += 2) {
97 dev_dbg(&client->dev, "eeprom read start has failed!\n"); 91 int word = i2c_smbus_read_word_data(client, i);
98 goto exit; 92 if (word < 0)
99 }
100 for (i = slice << 5; i < (slice + 1) << 5; i++) {
101 j = i2c_smbus_read_byte(client);
102 if (j < 0)
103 goto exit; 93 goto exit;
104 data->data[i] = (u8) j; 94 data->data[i] = word & 0xff;
95 data->data[i + 1] = word >> 8;
105 } 96 }
106 } 97 }
107 data->last_updated[slice] = jiffies; 98 data->last_updated[slice] = jiffies;
@@ -159,24 +150,33 @@ static struct bin_attribute eeprom_attr = {
159 150
160static int eeprom_attach_adapter(struct i2c_adapter *adapter) 151static int eeprom_attach_adapter(struct i2c_adapter *adapter)
161{ 152{
153 if (!(adapter->class & (I2C_CLASS_DDC | I2C_CLASS_SPD)))
154 return 0;
162 return i2c_probe(adapter, &addr_data, eeprom_detect); 155 return i2c_probe(adapter, &addr_data, eeprom_detect);
163} 156}
164 157
165/* This function is called by i2c_probe */ 158/* This function is called by i2c_probe */
166static int eeprom_detect(struct i2c_adapter *adapter, int address, int kind) 159static int eeprom_detect(struct i2c_adapter *adapter, int address, int kind)
167{ 160{
168 struct i2c_client *new_client; 161 struct i2c_client *client;
169 struct eeprom_data *data; 162 struct eeprom_data *data;
170 int err = 0; 163 int err = 0;
171 164
172 /* There are three ways we can read the EEPROM data: 165 /* EDID EEPROMs are often 24C00 EEPROMs, which answer to all
166 addresses 0x50-0x57, but we only care about 0x50. So decline
167 attaching to addresses >= 0x51 on DDC buses */
168 if (!(adapter->class & I2C_CLASS_SPD) && address >= 0x51)
169 goto exit;
170
171 /* There are four ways we can read the EEPROM data:
173 (1) I2C block reads (faster, but unsupported by most adapters) 172 (1) I2C block reads (faster, but unsupported by most adapters)
174 (2) Consecutive byte reads (100% overhead) 173 (2) Word reads (128% overhead)
175 (3) Regular byte data reads (200% overhead) 174 (3) Consecutive byte reads (88% overhead, unsafe)
176 The third method is not implemented by this driver because all 175 (4) Regular byte data reads (265% overhead)
177 known adapters support at least the second. */ 176 The third and fourth methods are not implemented by this driver
178 if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA 177 because all known adapters support one of the first two. */
179 | I2C_FUNC_SMBUS_BYTE)) 178 if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_WORD_DATA)
179 && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK))
180 goto exit; 180 goto exit;
181 181
182 if (!(data = kzalloc(sizeof(struct eeprom_data), GFP_KERNEL))) { 182 if (!(data = kzalloc(sizeof(struct eeprom_data), GFP_KERNEL))) {
@@ -184,50 +184,49 @@ static int eeprom_detect(struct i2c_adapter *adapter, int address, int kind)
184 goto exit; 184 goto exit;
185 } 185 }
186 186
187 new_client = &data->client; 187 client = &data->client;
188 memset(data->data, 0xff, EEPROM_SIZE); 188 memset(data->data, 0xff, EEPROM_SIZE);
189 i2c_set_clientdata(new_client, data); 189 i2c_set_clientdata(client, data);
190 new_client->addr = address; 190 client->addr = address;
191 new_client->adapter = adapter; 191 client->adapter = adapter;
192 new_client->driver = &eeprom_driver; 192 client->driver = &eeprom_driver;
193 new_client->flags = 0;
194 193
195 /* Fill in the remaining client fields */ 194 /* Fill in the remaining client fields */
196 strlcpy(new_client->name, "eeprom", I2C_NAME_SIZE); 195 strlcpy(client->name, "eeprom", I2C_NAME_SIZE);
197 data->valid = 0;
198 mutex_init(&data->update_lock); 196 mutex_init(&data->update_lock);
199 data->nature = UNKNOWN; 197 data->nature = UNKNOWN;
200 198
201 /* Tell the I2C layer a new client has arrived */ 199 /* Tell the I2C layer a new client has arrived */
202 if ((err = i2c_attach_client(new_client))) 200 if ((err = i2c_attach_client(client)))
203 goto exit_kfree; 201 goto exit_kfree;
204 202
205 /* Detect the Vaio nature of EEPROMs. 203 /* Detect the Vaio nature of EEPROMs.
206 We use the "PCG-" or "VGN-" prefix as the signature. */ 204 We use the "PCG-" or "VGN-" prefix as the signature. */
207 if (address == 0x57) { 205 if (address == 0x57
206 && i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
208 char name[4]; 207 char name[4];
209 208
210 name[0] = i2c_smbus_read_byte_data(new_client, 0x80); 209 name[0] = i2c_smbus_read_byte_data(client, 0x80);
211 name[1] = i2c_smbus_read_byte(new_client); 210 name[1] = i2c_smbus_read_byte_data(client, 0x81);
212 name[2] = i2c_smbus_read_byte(new_client); 211 name[2] = i2c_smbus_read_byte_data(client, 0x82);
213 name[3] = i2c_smbus_read_byte(new_client); 212 name[3] = i2c_smbus_read_byte_data(client, 0x83);
214 213
215 if (!memcmp(name, "PCG-", 4) || !memcmp(name, "VGN-", 4)) { 214 if (!memcmp(name, "PCG-", 4) || !memcmp(name, "VGN-", 4)) {
216 dev_info(&new_client->dev, "Vaio EEPROM detected, " 215 dev_info(&client->dev, "Vaio EEPROM detected, "
217 "enabling privacy protection\n"); 216 "enabling privacy protection\n");
218 data->nature = VAIO; 217 data->nature = VAIO;
219 } 218 }
220 } 219 }
221 220
222 /* create the sysfs eeprom file */ 221 /* create the sysfs eeprom file */
223 err = sysfs_create_bin_file(&new_client->dev.kobj, &eeprom_attr); 222 err = sysfs_create_bin_file(&client->dev.kobj, &eeprom_attr);
224 if (err) 223 if (err)
225 goto exit_detach; 224 goto exit_detach;
226 225
227 return 0; 226 return 0;
228 227
229exit_detach: 228exit_detach:
230 i2c_detach_client(new_client); 229 i2c_detach_client(client);
231exit_kfree: 230exit_kfree:
232 kfree(data); 231 kfree(data);
233exit: 232exit:
diff --git a/drivers/i2c/chips/max6875.c b/drivers/i2c/chips/max6875.c
index cf507b3f60f3..5a0285d8b6f9 100644
--- a/drivers/i2c/chips/max6875.c
+++ b/drivers/i2c/chips/max6875.c
@@ -170,7 +170,7 @@ static int max6875_detect(struct i2c_adapter *adapter, int address, int kind)
170 struct i2c_client *real_client; 170 struct i2c_client *real_client;
171 struct i2c_client *fake_client; 171 struct i2c_client *fake_client;
172 struct max6875_data *data; 172 struct max6875_data *data;
173 int err = 0; 173 int err;
174 174
175 if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE_DATA 175 if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE_DATA
176 | I2C_FUNC_SMBUS_READ_BYTE)) 176 | I2C_FUNC_SMBUS_READ_BYTE))
@@ -195,7 +195,6 @@ static int max6875_detect(struct i2c_adapter *adapter, int address, int kind)
195 real_client->addr = address; 195 real_client->addr = address;
196 real_client->adapter = adapter; 196 real_client->adapter = adapter;
197 real_client->driver = &max6875_driver; 197 real_client->driver = &max6875_driver;
198 real_client->flags = 0;
199 strlcpy(real_client->name, "max6875", I2C_NAME_SIZE); 198 strlcpy(real_client->name, "max6875", I2C_NAME_SIZE);
200 mutex_init(&data->update_lock); 199 mutex_init(&data->update_lock);
201 200
@@ -204,7 +203,6 @@ static int max6875_detect(struct i2c_adapter *adapter, int address, int kind)
204 fake_client->addr = address | 1; 203 fake_client->addr = address | 1;
205 fake_client->adapter = adapter; 204 fake_client->adapter = adapter;
206 fake_client->driver = &max6875_driver; 205 fake_client->driver = &max6875_driver;
207 fake_client->flags = 0;
208 strlcpy(fake_client->name, "max6875 subclient", I2C_NAME_SIZE); 206 strlcpy(fake_client->name, "max6875 subclient", I2C_NAME_SIZE);
209 207
210 if ((err = i2c_attach_client(real_client)) != 0) 208 if ((err = i2c_attach_client(real_client)) != 0)
diff --git a/drivers/i2c/chips/pca9539.c b/drivers/i2c/chips/pca9539.c
index f43c4e79b55e..58ab7f26be26 100644
--- a/drivers/i2c/chips/pca9539.c
+++ b/drivers/i2c/chips/pca9539.c
@@ -113,7 +113,7 @@ static int pca9539_attach_adapter(struct i2c_adapter *adapter)
113/* This function is called by i2c_probe */ 113/* This function is called by i2c_probe */
114static int pca9539_detect(struct i2c_adapter *adapter, int address, int kind) 114static int pca9539_detect(struct i2c_adapter *adapter, int address, int kind)
115{ 115{
116 struct i2c_client *new_client; 116 struct i2c_client *client;
117 struct pca9539_data *data; 117 struct pca9539_data *data;
118 int err = 0; 118 int err = 0;
119 119
@@ -127,29 +127,28 @@ static int pca9539_detect(struct i2c_adapter *adapter, int address, int kind)
127 goto exit; 127 goto exit;
128 } 128 }
129 129
130 new_client = &data->client; 130 client = &data->client;
131 i2c_set_clientdata(new_client, data); 131 i2c_set_clientdata(client, data);
132 new_client->addr = address; 132 client->addr = address;
133 new_client->adapter = adapter; 133 client->adapter = adapter;
134 new_client->driver = &pca9539_driver; 134 client->driver = &pca9539_driver;
135 new_client->flags = 0;
136 135
137 if (kind < 0) { 136 if (kind < 0) {
138 /* Detection: the pca9539 only has 8 registers (0-7). 137 /* Detection: the pca9539 only has 8 registers (0-7).
139 A read of 7 should succeed, but a read of 8 should fail. */ 138 A read of 7 should succeed, but a read of 8 should fail. */
140 if ((i2c_smbus_read_byte_data(new_client, 7) < 0) || 139 if ((i2c_smbus_read_byte_data(client, 7) < 0) ||
141 (i2c_smbus_read_byte_data(new_client, 8) >= 0)) 140 (i2c_smbus_read_byte_data(client, 8) >= 0))
142 goto exit_kfree; 141 goto exit_kfree;
143 } 142 }
144 143
145 strlcpy(new_client->name, "pca9539", I2C_NAME_SIZE); 144 strlcpy(client->name, "pca9539", I2C_NAME_SIZE);
146 145
147 /* Tell the I2C layer a new client has arrived */ 146 /* Tell the I2C layer a new client has arrived */
148 if ((err = i2c_attach_client(new_client))) 147 if ((err = i2c_attach_client(client)))
149 goto exit_kfree; 148 goto exit_kfree;
150 149
151 /* Register sysfs hooks */ 150 /* Register sysfs hooks */
152 err = sysfs_create_group(&new_client->dev.kobj, 151 err = sysfs_create_group(&client->dev.kobj,
153 &pca9539_defattr_group); 152 &pca9539_defattr_group);
154 if (err) 153 if (err)
155 goto exit_detach; 154 goto exit_detach;
@@ -157,7 +156,7 @@ static int pca9539_detect(struct i2c_adapter *adapter, int address, int kind)
157 return 0; 156 return 0;
158 157
159exit_detach: 158exit_detach:
160 i2c_detach_client(new_client); 159 i2c_detach_client(client);
161exit_kfree: 160exit_kfree:
162 kfree(data); 161 kfree(data);
163exit: 162exit:
diff --git a/drivers/i2c/chips/pcf8574.c b/drivers/i2c/chips/pcf8574.c
index e5b31329b56e..1b3db2b3ada9 100644
--- a/drivers/i2c/chips/pcf8574.c
+++ b/drivers/i2c/chips/pcf8574.c
@@ -1,6 +1,4 @@
1/* 1/*
2 pcf8574.c - Part of lm_sensors, Linux kernel modules for hardware
3 monitoring
4 Copyright (c) 2000 Frodo Looijaard <frodol@dds.nl>, 2 Copyright (c) 2000 Frodo Looijaard <frodol@dds.nl>,
5 Philip Edelbrock <phil@netroedge.com>, 3 Philip Edelbrock <phil@netroedge.com>,
6 Dan Eaton <dan.eaton@rocketlogix.com> 4 Dan Eaton <dan.eaton@rocketlogix.com>
@@ -129,7 +127,7 @@ static int pcf8574_attach_adapter(struct i2c_adapter *adapter)
129/* This function is called by i2c_probe */ 127/* This function is called by i2c_probe */
130static int pcf8574_detect(struct i2c_adapter *adapter, int address, int kind) 128static int pcf8574_detect(struct i2c_adapter *adapter, int address, int kind)
131{ 129{
132 struct i2c_client *new_client; 130 struct i2c_client *client;
133 struct pcf8574_data *data; 131 struct pcf8574_data *data;
134 int err = 0; 132 int err = 0;
135 const char *client_name = ""; 133 const char *client_name = "";
@@ -144,12 +142,11 @@ static int pcf8574_detect(struct i2c_adapter *adapter, int address, int kind)
144 goto exit; 142 goto exit;
145 } 143 }
146 144
147 new_client = &data->client; 145 client = &data->client;
148 i2c_set_clientdata(new_client, data); 146 i2c_set_clientdata(client, data);
149 new_client->addr = address; 147 client->addr = address;
150 new_client->adapter = adapter; 148 client->adapter = adapter;
151 new_client->driver = &pcf8574_driver; 149 client->driver = &pcf8574_driver;
152 new_client->flags = 0;
153 150
154 /* Now, we would do the remaining detection. But the PCF8574 is plainly 151 /* Now, we would do the remaining detection. But the PCF8574 is plainly
155 impossible to detect! Stupid chip. */ 152 impossible to detect! Stupid chip. */
@@ -168,23 +165,23 @@ static int pcf8574_detect(struct i2c_adapter *adapter, int address, int kind)
168 client_name = "pcf8574"; 165 client_name = "pcf8574";
169 166
170 /* Fill in the remaining client fields and put it into the global list */ 167 /* Fill in the remaining client fields and put it into the global list */
171 strlcpy(new_client->name, client_name, I2C_NAME_SIZE); 168 strlcpy(client->name, client_name, I2C_NAME_SIZE);
172 169
173 /* Tell the I2C layer a new client has arrived */ 170 /* Tell the I2C layer a new client has arrived */
174 if ((err = i2c_attach_client(new_client))) 171 if ((err = i2c_attach_client(client)))
175 goto exit_free; 172 goto exit_free;
176 173
177 /* Initialize the PCF8574 chip */ 174 /* Initialize the PCF8574 chip */
178 pcf8574_init_client(new_client); 175 pcf8574_init_client(client);
179 176
180 /* Register sysfs hooks */ 177 /* Register sysfs hooks */
181 err = sysfs_create_group(&new_client->dev.kobj, &pcf8574_attr_group); 178 err = sysfs_create_group(&client->dev.kobj, &pcf8574_attr_group);
182 if (err) 179 if (err)
183 goto exit_detach; 180 goto exit_detach;
184 return 0; 181 return 0;
185 182
186 exit_detach: 183 exit_detach:
187 i2c_detach_client(new_client); 184 i2c_detach_client(client);
188 exit_free: 185 exit_free:
189 kfree(data); 186 kfree(data);
190 exit: 187 exit:
diff --git a/drivers/i2c/chips/pcf8591.c b/drivers/i2c/chips/pcf8591.c
index 66c7c3bb9429..db735379f22f 100644
--- a/drivers/i2c/chips/pcf8591.c
+++ b/drivers/i2c/chips/pcf8591.c
@@ -1,6 +1,4 @@
1/* 1/*
2 pcf8591.c - Part of lm_sensors, Linux kernel modules for hardware
3 monitoring
4 Copyright (C) 2001-2004 Aurelien Jarno <aurelien@aurel32.net> 2 Copyright (C) 2001-2004 Aurelien Jarno <aurelien@aurel32.net>
5 Ported to Linux 2.6 by Aurelien Jarno <aurelien@aurel32.net> with 3 Ported to Linux 2.6 by Aurelien Jarno <aurelien@aurel32.net> with
6 the help of Jean Delvare <khali@linux-fr.org> 4 the help of Jean Delvare <khali@linux-fr.org>
@@ -190,7 +188,7 @@ static int pcf8591_attach_adapter(struct i2c_adapter *adapter)
190/* This function is called by i2c_probe */ 188/* This function is called by i2c_probe */
191static int pcf8591_detect(struct i2c_adapter *adapter, int address, int kind) 189static int pcf8591_detect(struct i2c_adapter *adapter, int address, int kind)
192{ 190{
193 struct i2c_client *new_client; 191 struct i2c_client *client;
194 struct pcf8591_data *data; 192 struct pcf8591_data *data;
195 int err = 0; 193 int err = 0;
196 194
@@ -205,12 +203,11 @@ static int pcf8591_detect(struct i2c_adapter *adapter, int address, int kind)
205 goto exit; 203 goto exit;
206 } 204 }
207 205
208 new_client = &data->client; 206 client = &data->client;
209 i2c_set_clientdata(new_client, data); 207 i2c_set_clientdata(client, data);
210 new_client->addr = address; 208 client->addr = address;
211 new_client->adapter = adapter; 209 client->adapter = adapter;
212 new_client->driver = &pcf8591_driver; 210 client->driver = &pcf8591_driver;
213 new_client->flags = 0;
214 211
215 /* Now, we would do the remaining detection. But the PCF8591 is plainly 212 /* Now, we would do the remaining detection. But the PCF8591 is plainly
216 impossible to detect! Stupid chip. */ 213 impossible to detect! Stupid chip. */
@@ -221,31 +218,31 @@ static int pcf8591_detect(struct i2c_adapter *adapter, int address, int kind)
221 218
222 /* Fill in the remaining client fields and put it into the global 219 /* Fill in the remaining client fields and put it into the global
223 list */ 220 list */
224 strlcpy(new_client->name, "pcf8591", I2C_NAME_SIZE); 221 strlcpy(client->name, "pcf8591", I2C_NAME_SIZE);
225 mutex_init(&data->update_lock); 222 mutex_init(&data->update_lock);
226 223
227 /* Tell the I2C layer a new client has arrived */ 224 /* Tell the I2C layer a new client has arrived */
228 if ((err = i2c_attach_client(new_client))) 225 if ((err = i2c_attach_client(client)))
229 goto exit_kfree; 226 goto exit_kfree;
230 227
231 /* Initialize the PCF8591 chip */ 228 /* Initialize the PCF8591 chip */
232 pcf8591_init_client(new_client); 229 pcf8591_init_client(client);
233 230
234 /* Register sysfs hooks */ 231 /* Register sysfs hooks */
235 err = sysfs_create_group(&new_client->dev.kobj, &pcf8591_attr_group); 232 err = sysfs_create_group(&client->dev.kobj, &pcf8591_attr_group);
236 if (err) 233 if (err)
237 goto exit_detach; 234 goto exit_detach;
238 235
239 /* Register input2 if not in "two differential inputs" mode */ 236 /* Register input2 if not in "two differential inputs" mode */
240 if (input_mode != 3) { 237 if (input_mode != 3) {
241 if ((err = device_create_file(&new_client->dev, 238 if ((err = device_create_file(&client->dev,
242 &dev_attr_in2_input))) 239 &dev_attr_in2_input)))
243 goto exit_sysfs_remove; 240 goto exit_sysfs_remove;
244 } 241 }
245 242
246 /* Register input3 only in "four single ended inputs" mode */ 243 /* Register input3 only in "four single ended inputs" mode */
247 if (input_mode == 0) { 244 if (input_mode == 0) {
248 if ((err = device_create_file(&new_client->dev, 245 if ((err = device_create_file(&client->dev,
249 &dev_attr_in3_input))) 246 &dev_attr_in3_input)))
250 goto exit_sysfs_remove; 247 goto exit_sysfs_remove;
251 } 248 }
@@ -253,10 +250,10 @@ static int pcf8591_detect(struct i2c_adapter *adapter, int address, int kind)
253 return 0; 250 return 0;
254 251
255exit_sysfs_remove: 252exit_sysfs_remove:
256 sysfs_remove_group(&new_client->dev.kobj, &pcf8591_attr_group_opt); 253 sysfs_remove_group(&client->dev.kobj, &pcf8591_attr_group_opt);
257 sysfs_remove_group(&new_client->dev.kobj, &pcf8591_attr_group); 254 sysfs_remove_group(&client->dev.kobj, &pcf8591_attr_group);
258exit_detach: 255exit_detach:
259 i2c_detach_client(new_client); 256 i2c_detach_client(client);
260exit_kfree: 257exit_kfree:
261 kfree(data); 258 kfree(data);
262exit: 259exit:
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index d0175f4f8fc6..0a79f7661017 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -29,13 +29,11 @@
29#include <linux/i2c.h> 29#include <linux/i2c.h>
30#include <linux/init.h> 30#include <linux/init.h>
31#include <linux/idr.h> 31#include <linux/idr.h>
32#include <linux/seq_file.h>
33#include <linux/platform_device.h> 32#include <linux/platform_device.h>
34#include <linux/mutex.h> 33#include <linux/mutex.h>
35#include <linux/completion.h> 34#include <linux/completion.h>
36#include <linux/hardirq.h> 35#include <linux/hardirq.h>
37#include <linux/irqflags.h> 36#include <linux/irqflags.h>
38#include <linux/semaphore.h>
39#include <asm/uaccess.h> 37#include <asm/uaccess.h>
40 38
41#include "i2c-core.h" 39#include "i2c-core.h"
@@ -44,7 +42,9 @@
44static DEFINE_MUTEX(core_lock); 42static DEFINE_MUTEX(core_lock);
45static DEFINE_IDR(i2c_adapter_idr); 43static DEFINE_IDR(i2c_adapter_idr);
46 44
47#define is_newstyle_driver(d) ((d)->probe || (d)->remove) 45#define is_newstyle_driver(d) ((d)->probe || (d)->remove || (d)->detect)
46
47static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver);
48 48
49/* ------------------------------------------------------------------------- */ 49/* ------------------------------------------------------------------------- */
50 50
@@ -103,19 +103,14 @@ static int i2c_device_probe(struct device *dev)
103{ 103{
104 struct i2c_client *client = to_i2c_client(dev); 104 struct i2c_client *client = to_i2c_client(dev);
105 struct i2c_driver *driver = to_i2c_driver(dev->driver); 105 struct i2c_driver *driver = to_i2c_driver(dev->driver);
106 const struct i2c_device_id *id;
107 int status; 106 int status;
108 107
109 if (!driver->probe) 108 if (!driver->probe || !driver->id_table)
110 return -ENODEV; 109 return -ENODEV;
111 client->driver = driver; 110 client->driver = driver;
112 dev_dbg(dev, "probe\n"); 111 dev_dbg(dev, "probe\n");
113 112
114 if (driver->id_table) 113 status = driver->probe(client, i2c_match_id(driver->id_table, client));
115 id = i2c_match_id(driver->id_table, client);
116 else
117 id = NULL;
118 status = driver->probe(client, id);
119 if (status) 114 if (status)
120 client->driver = NULL; 115 client->driver = NULL;
121 return status; 116 return status;
@@ -208,7 +203,7 @@ static struct device_attribute i2c_dev_attrs[] = {
208 { }, 203 { },
209}; 204};
210 205
211static struct bus_type i2c_bus_type = { 206struct bus_type i2c_bus_type = {
212 .name = "i2c", 207 .name = "i2c",
213 .dev_attrs = i2c_dev_attrs, 208 .dev_attrs = i2c_dev_attrs,
214 .match = i2c_device_match, 209 .match = i2c_device_match,
@@ -219,6 +214,7 @@ static struct bus_type i2c_bus_type = {
219 .suspend = i2c_device_suspend, 214 .suspend = i2c_device_suspend,
220 .resume = i2c_device_resume, 215 .resume = i2c_device_resume,
221}; 216};
217EXPORT_SYMBOL_GPL(i2c_bus_type);
222 218
223 219
224/** 220/**
@@ -306,6 +302,14 @@ void i2c_unregister_device(struct i2c_client *client)
306 return; 302 return;
307 } 303 }
308 304
305 if (adapter->client_unregister) {
306 if (adapter->client_unregister(client)) {
307 dev_warn(&client->dev,
308 "client_unregister [%s] failed\n",
309 client->name);
310 }
311 }
312
309 mutex_lock(&adapter->clist_lock); 313 mutex_lock(&adapter->clist_lock);
310 list_del(&client->list); 314 list_del(&client->list);
311 mutex_unlock(&adapter->clist_lock); 315 mutex_unlock(&adapter->clist_lock);
@@ -416,6 +420,10 @@ static int i2c_do_add_adapter(struct device_driver *d, void *data)
416 struct i2c_driver *driver = to_i2c_driver(d); 420 struct i2c_driver *driver = to_i2c_driver(d);
417 struct i2c_adapter *adap = data; 421 struct i2c_adapter *adap = data;
418 422
423 /* Detect supported devices on that bus, and instantiate them */
424 i2c_detect(adap, driver);
425
426 /* Let legacy drivers scan this bus for matching devices */
419 if (driver->attach_adapter) { 427 if (driver->attach_adapter) {
420 /* We ignore the return code; if it fails, too bad */ 428 /* We ignore the return code; if it fails, too bad */
421 driver->attach_adapter(adap); 429 driver->attach_adapter(adap);
@@ -455,7 +463,7 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
455 if (adap->nr < __i2c_first_dynamic_bus_num) 463 if (adap->nr < __i2c_first_dynamic_bus_num)
456 i2c_scan_static_board_info(adap); 464 i2c_scan_static_board_info(adap);
457 465
458 /* let legacy drivers scan this bus for matching devices */ 466 /* Notify drivers */
459 dummy = bus_for_each_drv(&i2c_bus_type, NULL, adap, 467 dummy = bus_for_each_drv(&i2c_bus_type, NULL, adap,
460 i2c_do_add_adapter); 468 i2c_do_add_adapter);
461 469
@@ -561,8 +569,19 @@ static int i2c_do_del_adapter(struct device_driver *d, void *data)
561{ 569{
562 struct i2c_driver *driver = to_i2c_driver(d); 570 struct i2c_driver *driver = to_i2c_driver(d);
563 struct i2c_adapter *adapter = data; 571 struct i2c_adapter *adapter = data;
572 struct i2c_client *client, *_n;
564 int res; 573 int res;
565 574
575 /* Remove the devices we created ourselves */
576 list_for_each_entry_safe(client, _n, &driver->clients, detected) {
577 if (client->adapter == adapter) {
578 dev_dbg(&adapter->dev, "Removing %s at 0x%x\n",
579 client->name, client->addr);
580 list_del(&client->detected);
581 i2c_unregister_device(client);
582 }
583 }
584
566 if (!driver->detach_adapter) 585 if (!driver->detach_adapter)
567 return 0; 586 return 0;
568 res = driver->detach_adapter(adapter); 587 res = driver->detach_adapter(adapter);
@@ -582,8 +601,7 @@ static int i2c_do_del_adapter(struct device_driver *d, void *data)
582 */ 601 */
583int i2c_del_adapter(struct i2c_adapter *adap) 602int i2c_del_adapter(struct i2c_adapter *adap)
584{ 603{
585 struct list_head *item, *_n; 604 struct i2c_client *client, *_n;
586 struct i2c_client *client;
587 int res = 0; 605 int res = 0;
588 606
589 mutex_lock(&core_lock); 607 mutex_lock(&core_lock);
@@ -604,10 +622,9 @@ int i2c_del_adapter(struct i2c_adapter *adap)
604 622
605 /* detach any active clients. This must be done first, because 623 /* detach any active clients. This must be done first, because
606 * it can fail; in which case we give up. */ 624 * it can fail; in which case we give up. */
607 list_for_each_safe(item, _n, &adap->clients) { 625 list_for_each_entry_safe(client, _n, &adap->clients, list) {
608 struct i2c_driver *driver; 626 struct i2c_driver *driver;
609 627
610 client = list_entry(item, struct i2c_client, list);
611 driver = client->driver; 628 driver = client->driver;
612 629
613 /* new style, follow standard driver model */ 630 /* new style, follow standard driver model */
@@ -646,6 +663,20 @@ EXPORT_SYMBOL(i2c_del_adapter);
646 663
647/* ------------------------------------------------------------------------- */ 664/* ------------------------------------------------------------------------- */
648 665
666static int __attach_adapter(struct device *dev, void *data)
667{
668 struct i2c_adapter *adapter = to_i2c_adapter(dev);
669 struct i2c_driver *driver = data;
670
671 i2c_detect(adapter, driver);
672
673 /* Legacy drivers scan i2c busses directly */
674 if (driver->attach_adapter)
675 driver->attach_adapter(adapter);
676
677 return 0;
678}
679
649/* 680/*
650 * An i2c_driver is used with one or more i2c_client (device) nodes to access 681 * An i2c_driver is used with one or more i2c_client (device) nodes to access
651 * i2c slave chips, on a bus instance associated with some i2c_adapter. There 682 * i2c slave chips, on a bus instance associated with some i2c_adapter. There
@@ -685,72 +716,70 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver)
685 716
686 pr_debug("i2c-core: driver [%s] registered\n", driver->driver.name); 717 pr_debug("i2c-core: driver [%s] registered\n", driver->driver.name);
687 718
688 /* legacy drivers scan i2c busses directly */ 719 INIT_LIST_HEAD(&driver->clients);
689 if (driver->attach_adapter) { 720 /* Walk the adapters that are already present */
690 struct i2c_adapter *adapter; 721 class_for_each_device(&i2c_adapter_class, driver, __attach_adapter);
691
692 down(&i2c_adapter_class.sem);
693 list_for_each_entry(adapter, &i2c_adapter_class.devices,
694 dev.node) {
695 driver->attach_adapter(adapter);
696 }
697 up(&i2c_adapter_class.sem);
698 }
699 722
700 mutex_unlock(&core_lock); 723 mutex_unlock(&core_lock);
701 return 0; 724 return 0;
702} 725}
703EXPORT_SYMBOL(i2c_register_driver); 726EXPORT_SYMBOL(i2c_register_driver);
704 727
705/** 728static int __detach_adapter(struct device *dev, void *data)
706 * i2c_del_driver - unregister I2C driver
707 * @driver: the driver being unregistered
708 * Context: can sleep
709 */
710void i2c_del_driver(struct i2c_driver *driver)
711{ 729{
712 struct list_head *item2, *_n; 730 struct i2c_adapter *adapter = to_i2c_adapter(dev);
713 struct i2c_client *client; 731 struct i2c_driver *driver = data;
714 struct i2c_adapter *adap; 732 struct i2c_client *client, *_n;
715 733
716 mutex_lock(&core_lock); 734 list_for_each_entry_safe(client, _n, &driver->clients, detected) {
735 dev_dbg(&adapter->dev, "Removing %s at 0x%x\n",
736 client->name, client->addr);
737 list_del(&client->detected);
738 i2c_unregister_device(client);
739 }
717 740
718 /* new-style driver? */
719 if (is_newstyle_driver(driver)) 741 if (is_newstyle_driver(driver))
720 goto unregister; 742 return 0;
721 743
722 /* Have a look at each adapter, if clients of this driver are still 744 /* Have a look at each adapter, if clients of this driver are still
723 * attached. If so, detach them to be able to kill the driver 745 * attached. If so, detach them to be able to kill the driver
724 * afterwards. 746 * afterwards.
725 */ 747 */
726 down(&i2c_adapter_class.sem); 748 if (driver->detach_adapter) {
727 list_for_each_entry(adap, &i2c_adapter_class.devices, dev.node) { 749 if (driver->detach_adapter(adapter))
728 if (driver->detach_adapter) { 750 dev_err(&adapter->dev,
729 if (driver->detach_adapter(adap)) { 751 "detach_adapter failed for driver [%s]\n",
730 dev_err(&adap->dev, "detach_adapter failed " 752 driver->driver.name);
731 "for driver [%s]\n", 753 } else {
732 driver->driver.name); 754 struct i2c_client *client, *_n;
733 } 755
734 } else { 756 list_for_each_entry_safe(client, _n, &adapter->clients, list) {
735 list_for_each_safe(item2, _n, &adap->clients) { 757 if (client->driver != driver)
736 client = list_entry(item2, struct i2c_client, list); 758 continue;
737 if (client->driver != driver) 759 dev_dbg(&adapter->dev,
738 continue; 760 "detaching client [%s] at 0x%02x\n",
739 dev_dbg(&adap->dev, "detaching client [%s] " 761 client->name, client->addr);
740 "at 0x%02x\n", client->name, 762 if (driver->detach_client(client))
741 client->addr); 763 dev_err(&adapter->dev, "detach_client "
742 if (driver->detach_client(client)) { 764 "failed for client [%s] at 0x%02x\n",
743 dev_err(&adap->dev, "detach_client " 765 client->name, client->addr);
744 "failed for client [%s] at "
745 "0x%02x\n", client->name,
746 client->addr);
747 }
748 }
749 } 766 }
750 } 767 }
751 up(&i2c_adapter_class.sem);
752 768
753 unregister: 769 return 0;
770}
771
772/**
773 * i2c_del_driver - unregister I2C driver
774 * @driver: the driver being unregistered
775 * Context: can sleep
776 */
777void i2c_del_driver(struct i2c_driver *driver)
778{
779 mutex_lock(&core_lock);
780
781 class_for_each_device(&i2c_adapter_class, driver, __detach_adapter);
782
754 driver_unregister(&driver->driver); 783 driver_unregister(&driver->driver);
755 pr_debug("i2c-core: driver [%s] unregistered\n", driver->driver.name); 784 pr_debug("i2c-core: driver [%s] unregistered\n", driver->driver.name);
756 785
@@ -863,8 +892,9 @@ EXPORT_SYMBOL(i2c_detach_client);
863 */ 892 */
864struct i2c_client *i2c_use_client(struct i2c_client *client) 893struct i2c_client *i2c_use_client(struct i2c_client *client)
865{ 894{
866 get_device(&client->dev); 895 if (client && get_device(&client->dev))
867 return client; 896 return client;
897 return NULL;
868} 898}
869EXPORT_SYMBOL(i2c_use_client); 899EXPORT_SYMBOL(i2c_use_client);
870 900
@@ -876,7 +906,8 @@ EXPORT_SYMBOL(i2c_use_client);
876 */ 906 */
877void i2c_release_client(struct i2c_client *client) 907void i2c_release_client(struct i2c_client *client)
878{ 908{
879 put_device(&client->dev); 909 if (client)
910 put_device(&client->dev);
880} 911}
881EXPORT_SYMBOL(i2c_release_client); 912EXPORT_SYMBOL(i2c_release_client);
882 913
@@ -942,10 +973,39 @@ module_exit(i2c_exit);
942 * ---------------------------------------------------- 973 * ----------------------------------------------------
943 */ 974 */
944 975
976/**
977 * i2c_transfer - execute a single or combined I2C message
978 * @adap: Handle to I2C bus
979 * @msgs: One or more messages to execute before STOP is issued to
980 * terminate the operation; each message begins with a START.
981 * @num: Number of messages to be executed.
982 *
983 * Returns negative errno, else the number of messages executed.
984 *
985 * Note that there is no requirement that each message be sent to
986 * the same slave address, although that is the most common model.
987 */
945int i2c_transfer(struct i2c_adapter * adap, struct i2c_msg *msgs, int num) 988int i2c_transfer(struct i2c_adapter * adap, struct i2c_msg *msgs, int num)
946{ 989{
947 int ret; 990 int ret;
948 991
992 /* REVISIT the fault reporting model here is weak:
993 *
994 * - When we get an error after receiving N bytes from a slave,
995 * there is no way to report "N".
996 *
997 * - When we get a NAK after transmitting N bytes to a slave,
998 * there is no way to report "N" ... or to let the master
999 * continue executing the rest of this combined message, if
1000 * that's the appropriate response.
1001 *
1002 * - When for example "num" is two and we successfully complete
1003 * the first message but get an error part way through the
1004 * second, it's unclear whether that should be reported as
1005 * one (discarding status on the second message) or errno
1006 * (discarding status on the first one).
1007 */
1008
949 if (adap->algo->master_xfer) { 1009 if (adap->algo->master_xfer) {
950#ifdef DEBUG 1010#ifdef DEBUG
951 for (ret = 0; ret < num; ret++) { 1011 for (ret = 0; ret < num; ret++) {
@@ -971,11 +1031,19 @@ int i2c_transfer(struct i2c_adapter * adap, struct i2c_msg *msgs, int num)
971 return ret; 1031 return ret;
972 } else { 1032 } else {
973 dev_dbg(&adap->dev, "I2C level transfers not supported\n"); 1033 dev_dbg(&adap->dev, "I2C level transfers not supported\n");
974 return -ENOSYS; 1034 return -EOPNOTSUPP;
975 } 1035 }
976} 1036}
977EXPORT_SYMBOL(i2c_transfer); 1037EXPORT_SYMBOL(i2c_transfer);
978 1038
1039/**
1040 * i2c_master_send - issue a single I2C message in master transmit mode
1041 * @client: Handle to slave device
1042 * @buf: Data that will be written to the slave
1043 * @count: How many bytes to write
1044 *
1045 * Returns negative errno, or else the number of bytes written.
1046 */
979int i2c_master_send(struct i2c_client *client,const char *buf ,int count) 1047int i2c_master_send(struct i2c_client *client,const char *buf ,int count)
980{ 1048{
981 int ret; 1049 int ret;
@@ -995,6 +1063,14 @@ int i2c_master_send(struct i2c_client *client,const char *buf ,int count)
995} 1063}
996EXPORT_SYMBOL(i2c_master_send); 1064EXPORT_SYMBOL(i2c_master_send);
997 1065
1066/**
1067 * i2c_master_recv - issue a single I2C message in master receive mode
1068 * @client: Handle to slave device
1069 * @buf: Where to store data read from slave
1070 * @count: How many bytes to read
1071 *
1072 * Returns negative errno, or else the number of bytes read.
1073 */
998int i2c_master_recv(struct i2c_client *client, char *buf ,int count) 1074int i2c_master_recv(struct i2c_client *client, char *buf ,int count)
999{ 1075{
1000 struct i2c_adapter *adap=client->adapter; 1076 struct i2c_adapter *adap=client->adapter;
@@ -1103,7 +1179,7 @@ int i2c_probe(struct i2c_adapter *adapter,
1103 1179
1104 dev_warn(&adapter->dev, "SMBus Quick command not supported, " 1180 dev_warn(&adapter->dev, "SMBus Quick command not supported, "
1105 "can't probe for chips\n"); 1181 "can't probe for chips\n");
1106 return -1; 1182 return -EOPNOTSUPP;
1107 } 1183 }
1108 1184
1109 /* Probe entries are done second, and are not affected by ignore 1185 /* Probe entries are done second, and are not affected by ignore
@@ -1157,6 +1233,179 @@ int i2c_probe(struct i2c_adapter *adapter,
1157} 1233}
1158EXPORT_SYMBOL(i2c_probe); 1234EXPORT_SYMBOL(i2c_probe);
1159 1235
1236/* Separate detection function for new-style drivers */
1237static int i2c_detect_address(struct i2c_client *temp_client, int kind,
1238 struct i2c_driver *driver)
1239{
1240 struct i2c_board_info info;
1241 struct i2c_adapter *adapter = temp_client->adapter;
1242 int addr = temp_client->addr;
1243 int err;
1244
1245 /* Make sure the address is valid */
1246 if (addr < 0x03 || addr > 0x77) {
1247 dev_warn(&adapter->dev, "Invalid probe address 0x%02x\n",
1248 addr);
1249 return -EINVAL;
1250 }
1251
1252 /* Skip if already in use */
1253 if (i2c_check_addr(adapter, addr))
1254 return 0;
1255
1256 /* Make sure there is something at this address, unless forced */
1257 if (kind < 0) {
1258 if (i2c_smbus_xfer(adapter, addr, 0, 0, 0,
1259 I2C_SMBUS_QUICK, NULL) < 0)
1260 return 0;
1261
1262 /* prevent 24RF08 corruption */
1263 if ((addr & ~0x0f) == 0x50)
1264 i2c_smbus_xfer(adapter, addr, 0, 0, 0,
1265 I2C_SMBUS_QUICK, NULL);
1266 }
1267
1268 /* Finally call the custom detection function */
1269 memset(&info, 0, sizeof(struct i2c_board_info));
1270 info.addr = addr;
1271 err = driver->detect(temp_client, kind, &info);
1272 if (err) {
1273 /* -ENODEV is returned if the detection fails. We catch it
1274 here as this isn't an error. */
1275 return err == -ENODEV ? 0 : err;
1276 }
1277
1278 /* Consistency check */
1279 if (info.type[0] == '\0') {
1280 dev_err(&adapter->dev, "%s detection function provided "
1281 "no name for 0x%x\n", driver->driver.name,
1282 addr);
1283 } else {
1284 struct i2c_client *client;
1285
1286 /* Detection succeeded, instantiate the device */
1287 dev_dbg(&adapter->dev, "Creating %s at 0x%02x\n",
1288 info.type, info.addr);
1289 client = i2c_new_device(adapter, &info);
1290 if (client)
1291 list_add_tail(&client->detected, &driver->clients);
1292 else
1293 dev_err(&adapter->dev, "Failed creating %s at 0x%02x\n",
1294 info.type, info.addr);
1295 }
1296 return 0;
1297}
1298
1299static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver)
1300{
1301 const struct i2c_client_address_data *address_data;
1302 struct i2c_client *temp_client;
1303 int i, err = 0;
1304 int adap_id = i2c_adapter_id(adapter);
1305
1306 address_data = driver->address_data;
1307 if (!driver->detect || !address_data)
1308 return 0;
1309
1310 /* Set up a temporary client to help detect callback */
1311 temp_client = kzalloc(sizeof(struct i2c_client), GFP_KERNEL);
1312 if (!temp_client)
1313 return -ENOMEM;
1314 temp_client->adapter = adapter;
1315
1316 /* Force entries are done first, and are not affected by ignore
1317 entries */
1318 if (address_data->forces) {
1319 const unsigned short * const *forces = address_data->forces;
1320 int kind;
1321
1322 for (kind = 0; forces[kind]; kind++) {
1323 for (i = 0; forces[kind][i] != I2C_CLIENT_END;
1324 i += 2) {
1325 if (forces[kind][i] == adap_id
1326 || forces[kind][i] == ANY_I2C_BUS) {
1327 dev_dbg(&adapter->dev, "found force "
1328 "parameter for adapter %d, "
1329 "addr 0x%02x, kind %d\n",
1330 adap_id, forces[kind][i + 1],
1331 kind);
1332 temp_client->addr = forces[kind][i + 1];
1333 err = i2c_detect_address(temp_client,
1334 kind, driver);
1335 if (err)
1336 goto exit_free;
1337 }
1338 }
1339 }
1340 }
1341
1342 /* Stop here if we can't use SMBUS_QUICK */
1343 if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_QUICK)) {
1344 if (address_data->probe[0] == I2C_CLIENT_END
1345 && address_data->normal_i2c[0] == I2C_CLIENT_END)
1346 goto exit_free;
1347
1348 dev_warn(&adapter->dev, "SMBus Quick command not supported, "
1349 "can't probe for chips\n");
1350 err = -EOPNOTSUPP;
1351 goto exit_free;
1352 }
1353
1354 /* Stop here if the classes do not match */
1355 if (!(adapter->class & driver->class))
1356 goto exit_free;
1357
1358 /* Probe entries are done second, and are not affected by ignore
1359 entries either */
1360 for (i = 0; address_data->probe[i] != I2C_CLIENT_END; i += 2) {
1361 if (address_data->probe[i] == adap_id
1362 || address_data->probe[i] == ANY_I2C_BUS) {
1363 dev_dbg(&adapter->dev, "found probe parameter for "
1364 "adapter %d, addr 0x%02x\n", adap_id,
1365 address_data->probe[i + 1]);
1366 temp_client->addr = address_data->probe[i + 1];
1367 err = i2c_detect_address(temp_client, -1, driver);
1368 if (err)
1369 goto exit_free;
1370 }
1371 }
1372
1373 /* Normal entries are done last, unless shadowed by an ignore entry */
1374 for (i = 0; address_data->normal_i2c[i] != I2C_CLIENT_END; i += 1) {
1375 int j, ignore;
1376
1377 ignore = 0;
1378 for (j = 0; address_data->ignore[j] != I2C_CLIENT_END;
1379 j += 2) {
1380 if ((address_data->ignore[j] == adap_id ||
1381 address_data->ignore[j] == ANY_I2C_BUS)
1382 && address_data->ignore[j + 1]
1383 == address_data->normal_i2c[i]) {
1384 dev_dbg(&adapter->dev, "found ignore "
1385 "parameter for adapter %d, "
1386 "addr 0x%02x\n", adap_id,
1387 address_data->ignore[j + 1]);
1388 ignore = 1;
1389 break;
1390 }
1391 }
1392 if (ignore)
1393 continue;
1394
1395 dev_dbg(&adapter->dev, "found normal entry for adapter %d, "
1396 "addr 0x%02x\n", adap_id,
1397 address_data->normal_i2c[i]);
1398 temp_client->addr = address_data->normal_i2c[i];
1399 err = i2c_detect_address(temp_client, -1, driver);
1400 if (err)
1401 goto exit_free;
1402 }
1403
1404 exit_free:
1405 kfree(temp_client);
1406 return err;
1407}
1408
1160struct i2c_client * 1409struct i2c_client *
1161i2c_new_probed_device(struct i2c_adapter *adap, 1410i2c_new_probed_device(struct i2c_adapter *adap,
1162 struct i2c_board_info *info, 1411 struct i2c_board_info *info,
@@ -1295,29 +1544,38 @@ static int i2c_smbus_check_pec(u8 cpec, struct i2c_msg *msg)
1295 if (rpec != cpec) { 1544 if (rpec != cpec) {
1296 pr_debug("i2c-core: Bad PEC 0x%02x vs. 0x%02x\n", 1545 pr_debug("i2c-core: Bad PEC 0x%02x vs. 0x%02x\n",
1297 rpec, cpec); 1546 rpec, cpec);
1298 return -1; 1547 return -EBADMSG;
1299 } 1548 }
1300 return 0; 1549 return 0;
1301} 1550}
1302 1551
1303s32 i2c_smbus_write_quick(struct i2c_client *client, u8 value) 1552/**
1304{ 1553 * i2c_smbus_read_byte - SMBus "receive byte" protocol
1305 return i2c_smbus_xfer(client->adapter,client->addr,client->flags, 1554 * @client: Handle to slave device
1306 value,0,I2C_SMBUS_QUICK,NULL); 1555 *
1307} 1556 * This executes the SMBus "receive byte" protocol, returning negative errno
1308EXPORT_SYMBOL(i2c_smbus_write_quick); 1557 * else the byte received from the device.
1309 1558 */
1310s32 i2c_smbus_read_byte(struct i2c_client *client) 1559s32 i2c_smbus_read_byte(struct i2c_client *client)
1311{ 1560{
1312 union i2c_smbus_data data; 1561 union i2c_smbus_data data;
1313 if (i2c_smbus_xfer(client->adapter,client->addr,client->flags, 1562 int status;
1314 I2C_SMBUS_READ,0,I2C_SMBUS_BYTE, &data)) 1563
1315 return -1; 1564 status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
1316 else 1565 I2C_SMBUS_READ, 0,
1317 return data.byte; 1566 I2C_SMBUS_BYTE, &data);
1567 return (status < 0) ? status : data.byte;
1318} 1568}
1319EXPORT_SYMBOL(i2c_smbus_read_byte); 1569EXPORT_SYMBOL(i2c_smbus_read_byte);
1320 1570
1571/**
1572 * i2c_smbus_write_byte - SMBus "send byte" protocol
1573 * @client: Handle to slave device
1574 * @value: Byte to be sent
1575 *
1576 * This executes the SMBus "send byte" protocol, returning negative errno
1577 * else zero on success.
1578 */
1321s32 i2c_smbus_write_byte(struct i2c_client *client, u8 value) 1579s32 i2c_smbus_write_byte(struct i2c_client *client, u8 value)
1322{ 1580{
1323 return i2c_smbus_xfer(client->adapter,client->addr,client->flags, 1581 return i2c_smbus_xfer(client->adapter,client->addr,client->flags,
@@ -1325,17 +1583,35 @@ s32 i2c_smbus_write_byte(struct i2c_client *client, u8 value)
1325} 1583}
1326EXPORT_SYMBOL(i2c_smbus_write_byte); 1584EXPORT_SYMBOL(i2c_smbus_write_byte);
1327 1585
1586/**
1587 * i2c_smbus_read_byte_data - SMBus "read byte" protocol
1588 * @client: Handle to slave device
1589 * @command: Byte interpreted by slave
1590 *
1591 * This executes the SMBus "read byte" protocol, returning negative errno
1592 * else a data byte received from the device.
1593 */
1328s32 i2c_smbus_read_byte_data(struct i2c_client *client, u8 command) 1594s32 i2c_smbus_read_byte_data(struct i2c_client *client, u8 command)
1329{ 1595{
1330 union i2c_smbus_data data; 1596 union i2c_smbus_data data;
1331 if (i2c_smbus_xfer(client->adapter,client->addr,client->flags, 1597 int status;
1332 I2C_SMBUS_READ,command, I2C_SMBUS_BYTE_DATA,&data)) 1598
1333 return -1; 1599 status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
1334 else 1600 I2C_SMBUS_READ, command,
1335 return data.byte; 1601 I2C_SMBUS_BYTE_DATA, &data);
1602 return (status < 0) ? status : data.byte;
1336} 1603}
1337EXPORT_SYMBOL(i2c_smbus_read_byte_data); 1604EXPORT_SYMBOL(i2c_smbus_read_byte_data);
1338 1605
1606/**
1607 * i2c_smbus_write_byte_data - SMBus "write byte" protocol
1608 * @client: Handle to slave device
1609 * @command: Byte interpreted by slave
1610 * @value: Byte being written
1611 *
1612 * This executes the SMBus "write byte" protocol, returning negative errno
1613 * else zero on success.
1614 */
1339s32 i2c_smbus_write_byte_data(struct i2c_client *client, u8 command, u8 value) 1615s32 i2c_smbus_write_byte_data(struct i2c_client *client, u8 command, u8 value)
1340{ 1616{
1341 union i2c_smbus_data data; 1617 union i2c_smbus_data data;
@@ -1346,17 +1622,35 @@ s32 i2c_smbus_write_byte_data(struct i2c_client *client, u8 command, u8 value)
1346} 1622}
1347EXPORT_SYMBOL(i2c_smbus_write_byte_data); 1623EXPORT_SYMBOL(i2c_smbus_write_byte_data);
1348 1624
1625/**
1626 * i2c_smbus_read_word_data - SMBus "read word" protocol
1627 * @client: Handle to slave device
1628 * @command: Byte interpreted by slave
1629 *
1630 * This executes the SMBus "read word" protocol, returning negative errno
1631 * else a 16-bit unsigned "word" received from the device.
1632 */
1349s32 i2c_smbus_read_word_data(struct i2c_client *client, u8 command) 1633s32 i2c_smbus_read_word_data(struct i2c_client *client, u8 command)
1350{ 1634{
1351 union i2c_smbus_data data; 1635 union i2c_smbus_data data;
1352 if (i2c_smbus_xfer(client->adapter,client->addr,client->flags, 1636 int status;
1353 I2C_SMBUS_READ,command, I2C_SMBUS_WORD_DATA, &data)) 1637
1354 return -1; 1638 status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
1355 else 1639 I2C_SMBUS_READ, command,
1356 return data.word; 1640 I2C_SMBUS_WORD_DATA, &data);
1641 return (status < 0) ? status : data.word;
1357} 1642}
1358EXPORT_SYMBOL(i2c_smbus_read_word_data); 1643EXPORT_SYMBOL(i2c_smbus_read_word_data);
1359 1644
1645/**
1646 * i2c_smbus_write_word_data - SMBus "write word" protocol
1647 * @client: Handle to slave device
1648 * @command: Byte interpreted by slave
1649 * @value: 16-bit "word" being written
1650 *
1651 * This executes the SMBus "write word" protocol, returning negative errno
1652 * else zero on success.
1653 */
1360s32 i2c_smbus_write_word_data(struct i2c_client *client, u8 command, u16 value) 1654s32 i2c_smbus_write_word_data(struct i2c_client *client, u8 command, u16 value)
1361{ 1655{
1362 union i2c_smbus_data data; 1656 union i2c_smbus_data data;
@@ -1368,15 +1662,14 @@ s32 i2c_smbus_write_word_data(struct i2c_client *client, u8 command, u16 value)
1368EXPORT_SYMBOL(i2c_smbus_write_word_data); 1662EXPORT_SYMBOL(i2c_smbus_write_word_data);
1369 1663
1370/** 1664/**
1371 * i2c_smbus_read_block_data - SMBus block read request 1665 * i2c_smbus_read_block_data - SMBus "block read" protocol
1372 * @client: Handle to slave device 1666 * @client: Handle to slave device
1373 * @command: Command byte issued to let the slave know what data should 1667 * @command: Byte interpreted by slave
1374 * be returned
1375 * @values: Byte array into which data will be read; big enough to hold 1668 * @values: Byte array into which data will be read; big enough to hold
1376 * the data returned by the slave. SMBus allows at most 32 bytes. 1669 * the data returned by the slave. SMBus allows at most 32 bytes.
1377 * 1670 *
1378 * Returns the number of bytes read in the slave's response, else a 1671 * This executes the SMBus "block read" protocol, returning negative errno
1379 * negative number to indicate some kind of error. 1672 * else the number of data bytes in the slave's response.
1380 * 1673 *
1381 * Note that using this function requires that the client's adapter support 1674 * Note that using this function requires that the client's adapter support
1382 * the I2C_FUNC_SMBUS_READ_BLOCK_DATA functionality. Not all adapter drivers 1675 * the I2C_FUNC_SMBUS_READ_BLOCK_DATA functionality. Not all adapter drivers
@@ -1387,17 +1680,29 @@ s32 i2c_smbus_read_block_data(struct i2c_client *client, u8 command,
1387 u8 *values) 1680 u8 *values)
1388{ 1681{
1389 union i2c_smbus_data data; 1682 union i2c_smbus_data data;
1683 int status;
1390 1684
1391 if (i2c_smbus_xfer(client->adapter, client->addr, client->flags, 1685 status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
1392 I2C_SMBUS_READ, command, 1686 I2C_SMBUS_READ, command,
1393 I2C_SMBUS_BLOCK_DATA, &data)) 1687 I2C_SMBUS_BLOCK_DATA, &data);
1394 return -1; 1688 if (status)
1689 return status;
1395 1690
1396 memcpy(values, &data.block[1], data.block[0]); 1691 memcpy(values, &data.block[1], data.block[0]);
1397 return data.block[0]; 1692 return data.block[0];
1398} 1693}
1399EXPORT_SYMBOL(i2c_smbus_read_block_data); 1694EXPORT_SYMBOL(i2c_smbus_read_block_data);
1400 1695
1696/**
1697 * i2c_smbus_write_block_data - SMBus "block write" protocol
1698 * @client: Handle to slave device
1699 * @command: Byte interpreted by slave
1700 * @length: Size of data block; SMBus allows at most 32 bytes
1701 * @values: Byte array which will be written.
1702 *
1703 * This executes the SMBus "block write" protocol, returning negative errno
1704 * else zero on success.
1705 */
1401s32 i2c_smbus_write_block_data(struct i2c_client *client, u8 command, 1706s32 i2c_smbus_write_block_data(struct i2c_client *client, u8 command,
1402 u8 length, const u8 *values) 1707 u8 length, const u8 *values)
1403{ 1708{
@@ -1418,14 +1723,16 @@ s32 i2c_smbus_read_i2c_block_data(struct i2c_client *client, u8 command,
1418 u8 length, u8 *values) 1723 u8 length, u8 *values)
1419{ 1724{
1420 union i2c_smbus_data data; 1725 union i2c_smbus_data data;
1726 int status;
1421 1727
1422 if (length > I2C_SMBUS_BLOCK_MAX) 1728 if (length > I2C_SMBUS_BLOCK_MAX)
1423 length = I2C_SMBUS_BLOCK_MAX; 1729 length = I2C_SMBUS_BLOCK_MAX;
1424 data.block[0] = length; 1730 data.block[0] = length;
1425 if (i2c_smbus_xfer(client->adapter,client->addr,client->flags, 1731 status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
1426 I2C_SMBUS_READ,command, 1732 I2C_SMBUS_READ, command,
1427 I2C_SMBUS_I2C_BLOCK_DATA,&data)) 1733 I2C_SMBUS_I2C_BLOCK_DATA, &data);
1428 return -1; 1734 if (status < 0)
1735 return status;
1429 1736
1430 memcpy(values, &data.block[1], data.block[0]); 1737 memcpy(values, &data.block[1], data.block[0]);
1431 return data.block[0]; 1738 return data.block[0];
@@ -1466,6 +1773,7 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter * adapter, u16 addr,
1466 }; 1773 };
1467 int i; 1774 int i;
1468 u8 partial_pec = 0; 1775 u8 partial_pec = 0;
1776 int status;
1469 1777
1470 msgbuf0[0] = command; 1778 msgbuf0[0] = command;
1471 switch(size) { 1779 switch(size) {
@@ -1515,10 +1823,10 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter * adapter, u16 addr,
1515 } else { 1823 } else {
1516 msg[0].len = data->block[0] + 2; 1824 msg[0].len = data->block[0] + 2;
1517 if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 2) { 1825 if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 2) {
1518 dev_err(&adapter->dev, "smbus_access called with " 1826 dev_err(&adapter->dev,
1519 "invalid block write size (%d)\n", 1827 "Invalid block write size %d\n",
1520 data->block[0]); 1828 data->block[0]);
1521 return -1; 1829 return -EINVAL;
1522 } 1830 }
1523 for (i = 1; i < msg[0].len; i++) 1831 for (i = 1; i < msg[0].len; i++)
1524 msgbuf0[i] = data->block[i-1]; 1832 msgbuf0[i] = data->block[i-1];
@@ -1528,10 +1836,10 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter * adapter, u16 addr,
1528 num = 2; /* Another special case */ 1836 num = 2; /* Another special case */
1529 read_write = I2C_SMBUS_READ; 1837 read_write = I2C_SMBUS_READ;
1530 if (data->block[0] > I2C_SMBUS_BLOCK_MAX) { 1838 if (data->block[0] > I2C_SMBUS_BLOCK_MAX) {
1531 dev_err(&adapter->dev, "%s called with invalid " 1839 dev_err(&adapter->dev,
1532 "block proc call size (%d)\n", __func__, 1840 "Invalid block write size %d\n",
1533 data->block[0]); 1841 data->block[0]);
1534 return -1; 1842 return -EINVAL;
1535 } 1843 }
1536 msg[0].len = data->block[0] + 2; 1844 msg[0].len = data->block[0] + 2;
1537 for (i = 1; i < msg[0].len; i++) 1845 for (i = 1; i < msg[0].len; i++)
@@ -1546,19 +1854,18 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter * adapter, u16 addr,
1546 } else { 1854 } else {
1547 msg[0].len = data->block[0] + 1; 1855 msg[0].len = data->block[0] + 1;
1548 if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 1) { 1856 if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 1) {
1549 dev_err(&adapter->dev, "i2c_smbus_xfer_emulated called with " 1857 dev_err(&adapter->dev,
1550 "invalid block write size (%d)\n", 1858 "Invalid block write size %d\n",
1551 data->block[0]); 1859 data->block[0]);
1552 return -1; 1860 return -EINVAL;
1553 } 1861 }
1554 for (i = 1; i <= data->block[0]; i++) 1862 for (i = 1; i <= data->block[0]; i++)
1555 msgbuf0[i] = data->block[i]; 1863 msgbuf0[i] = data->block[i];
1556 } 1864 }
1557 break; 1865 break;
1558 default: 1866 default:
1559 dev_err(&adapter->dev, "smbus_access called with invalid size (%d)\n", 1867 dev_err(&adapter->dev, "Unsupported transaction %d\n", size);
1560 size); 1868 return -EOPNOTSUPP;
1561 return -1;
1562 } 1869 }
1563 1870
1564 i = ((flags & I2C_CLIENT_PEC) && size != I2C_SMBUS_QUICK 1871 i = ((flags & I2C_CLIENT_PEC) && size != I2C_SMBUS_QUICK
@@ -1576,13 +1883,15 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter * adapter, u16 addr,
1576 msg[num-1].len++; 1883 msg[num-1].len++;
1577 } 1884 }
1578 1885
1579 if (i2c_transfer(adapter, msg, num) < 0) 1886 status = i2c_transfer(adapter, msg, num);
1580 return -1; 1887 if (status < 0)
1888 return status;
1581 1889
1582 /* Check PEC if last message is a read */ 1890 /* Check PEC if last message is a read */
1583 if (i && (msg[num-1].flags & I2C_M_RD)) { 1891 if (i && (msg[num-1].flags & I2C_M_RD)) {
1584 if (i2c_smbus_check_pec(partial_pec, &msg[num-1]) < 0) 1892 status = i2c_smbus_check_pec(partial_pec, &msg[num-1]);
1585 return -1; 1893 if (status < 0)
1894 return status;
1586 } 1895 }
1587 1896
1588 if (read_write == I2C_SMBUS_READ) 1897 if (read_write == I2C_SMBUS_READ)
@@ -1610,9 +1919,21 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter * adapter, u16 addr,
1610 return 0; 1919 return 0;
1611} 1920}
1612 1921
1613 1922/**
1923 * i2c_smbus_xfer - execute SMBus protocol operations
1924 * @adapter: Handle to I2C bus
1925 * @addr: Address of SMBus slave on that bus
1926 * @flags: I2C_CLIENT_* flags (usually zero or I2C_CLIENT_PEC)
1927 * @read_write: I2C_SMBUS_READ or I2C_SMBUS_WRITE
1928 * @command: Byte interpreted by slave, for protocols which use such bytes
1929 * @protocol: SMBus protocol operation to execute, such as I2C_SMBUS_PROC_CALL
1930 * @data: Data to be read or written
1931 *
1932 * This executes an SMBus protocol operation, and returns a negative
1933 * errno code else zero on success.
1934 */
1614s32 i2c_smbus_xfer(struct i2c_adapter * adapter, u16 addr, unsigned short flags, 1935s32 i2c_smbus_xfer(struct i2c_adapter * adapter, u16 addr, unsigned short flags,
1615 char read_write, u8 command, int size, 1936 char read_write, u8 command, int protocol,
1616 union i2c_smbus_data * data) 1937 union i2c_smbus_data * data)
1617{ 1938{
1618 s32 res; 1939 s32 res;
@@ -1622,11 +1943,11 @@ s32 i2c_smbus_xfer(struct i2c_adapter * adapter, u16 addr, unsigned short flags,
1622 if (adapter->algo->smbus_xfer) { 1943 if (adapter->algo->smbus_xfer) {
1623 mutex_lock(&adapter->bus_lock); 1944 mutex_lock(&adapter->bus_lock);
1624 res = adapter->algo->smbus_xfer(adapter,addr,flags,read_write, 1945 res = adapter->algo->smbus_xfer(adapter,addr,flags,read_write,
1625 command,size,data); 1946 command, protocol, data);
1626 mutex_unlock(&adapter->bus_lock); 1947 mutex_unlock(&adapter->bus_lock);
1627 } else 1948 } else
1628 res = i2c_smbus_xfer_emulated(adapter,addr,flags,read_write, 1949 res = i2c_smbus_xfer_emulated(adapter,addr,flags,read_write,
1629 command,size,data); 1950 command, protocol, data);
1630 1951
1631 return res; 1952 return res;
1632} 1953}
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index 006a5857256a..86727fa8858f 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -367,8 +367,7 @@ static noinline int i2cdev_ioctl_smbus(struct i2c_client *client,
367 return res; 367 return res;
368} 368}
369 369
370static int i2cdev_ioctl(struct inode *inode, struct file *file, 370static long i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
371 unsigned int cmd, unsigned long arg)
372{ 371{
373 struct i2c_client *client = (struct i2c_client *)file->private_data; 372 struct i2c_client *client = (struct i2c_client *)file->private_data;
374 unsigned long funcs; 373 unsigned long funcs;
@@ -497,7 +496,7 @@ static const struct file_operations i2cdev_fops = {
497 .llseek = no_llseek, 496 .llseek = no_llseek,
498 .read = i2cdev_read, 497 .read = i2cdev_read,
499 .write = i2cdev_write, 498 .write = i2cdev_write,
500 .ioctl = i2cdev_ioctl, 499 .unlocked_ioctl = i2cdev_ioctl,
501 .open = i2cdev_open, 500 .open = i2cdev_open,
502 .release = i2cdev_release, 501 .release = i2cdev_release,
503}; 502};
@@ -559,19 +558,12 @@ static int i2cdev_detach_adapter(struct i2c_adapter *adap)
559 return 0; 558 return 0;
560} 559}
561 560
562static int i2cdev_detach_client(struct i2c_client *client)
563{
564 return 0;
565}
566
567static struct i2c_driver i2cdev_driver = { 561static struct i2c_driver i2cdev_driver = {
568 .driver = { 562 .driver = {
569 .name = "dev_driver", 563 .name = "dev_driver",
570 }, 564 },
571 .id = I2C_DRIVERID_I2CDEV,
572 .attach_adapter = i2cdev_attach_adapter, 565 .attach_adapter = i2cdev_attach_adapter,
573 .detach_adapter = i2cdev_detach_adapter, 566 .detach_adapter = i2cdev_detach_adapter,
574 .detach_client = i2cdev_detach_client,
575}; 567};
576 568
577/* ------------------------------------------------------------------------- */ 569/* ------------------------------------------------------------------------- */
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 1607536ff5fb..cf707c8f08d4 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -98,6 +98,9 @@ if BLK_DEV_IDE
98 98
99comment "Please see Documentation/ide/ide.txt for help/info on IDE drives" 99comment "Please see Documentation/ide/ide.txt for help/info on IDE drives"
100 100
101config IDE_ATAPI
102 bool
103
101config BLK_DEV_IDE_SATA 104config BLK_DEV_IDE_SATA
102 bool "Support for SATA (deprecated; conflicts with libata SATA driver)" 105 bool "Support for SATA (deprecated; conflicts with libata SATA driver)"
103 default n 106 default n
@@ -201,6 +204,7 @@ config BLK_DEV_IDECD_VERBOSE_ERRORS
201 204
202config BLK_DEV_IDETAPE 205config BLK_DEV_IDETAPE
203 tristate "Include IDE/ATAPI TAPE support" 206 tristate "Include IDE/ATAPI TAPE support"
207 select IDE_ATAPI
204 help 208 help
205 If you have an IDE tape drive using the ATAPI protocol, say Y. 209 If you have an IDE tape drive using the ATAPI protocol, say Y.
206 ATAPI is a newer protocol used by IDE tape and CD-ROM drives, 210 ATAPI is a newer protocol used by IDE tape and CD-ROM drives,
@@ -223,6 +227,7 @@ config BLK_DEV_IDETAPE
223 227
224config BLK_DEV_IDEFLOPPY 228config BLK_DEV_IDEFLOPPY
225 tristate "Include IDE/ATAPI FLOPPY support" 229 tristate "Include IDE/ATAPI FLOPPY support"
230 select IDE_ATAPI
226 ---help--- 231 ---help---
227 If you have an IDE floppy drive which uses the ATAPI protocol, 232 If you have an IDE floppy drive which uses the ATAPI protocol,
228 answer Y. ATAPI is a newer protocol used by IDE CD-ROM/tape/floppy 233 answer Y. ATAPI is a newer protocol used by IDE CD-ROM/tape/floppy
@@ -246,6 +251,7 @@ config BLK_DEV_IDEFLOPPY
246config BLK_DEV_IDESCSI 251config BLK_DEV_IDESCSI
247 tristate "SCSI emulation support" 252 tristate "SCSI emulation support"
248 depends on SCSI 253 depends on SCSI
254 select IDE_ATAPI
249 ---help--- 255 ---help---
250 WARNING: ide-scsi is no longer needed for cd writing applications! 256 WARNING: ide-scsi is no longer needed for cd writing applications!
251 The 2.6 kernel supports direct writing to ide-cd, which eliminates 257 The 2.6 kernel supports direct writing to ide-cd, which eliminates
diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index f94b679b611e..a2b3f84d710d 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -14,6 +14,7 @@ EXTRA_CFLAGS += -Idrivers/ide
14ide-core-y += ide.o ide-io.o ide-iops.o ide-lib.o ide-probe.o ide-taskfile.o 14ide-core-y += ide.o ide-io.o ide-iops.o ide-lib.o ide-probe.o ide-taskfile.o
15 15
16# core IDE code 16# core IDE code
17ide-core-$(CONFIG_IDE_ATAPI) += ide-atapi.o
17ide-core-$(CONFIG_BLK_DEV_IDEPCI) += setup-pci.o 18ide-core-$(CONFIG_BLK_DEV_IDEPCI) += setup-pci.o
18ide-core-$(CONFIG_BLK_DEV_IDEDMA) += ide-dma.o 19ide-core-$(CONFIG_BLK_DEV_IDEDMA) += ide-dma.o
19ide-core-$(CONFIG_IDE_PROC_FS) += ide-proc.o 20ide-core-$(CONFIG_IDE_PROC_FS) += ide-proc.o
diff --git a/drivers/ide/arm/palm_bk3710.c b/drivers/ide/arm/palm_bk3710.c
index 2f2b4f4cf229..3839f5722985 100644
--- a/drivers/ide/arm/palm_bk3710.c
+++ b/drivers/ide/arm/palm_bk3710.c
@@ -83,7 +83,7 @@ static const struct palm_bk3710_udmatiming palm_bk3710_udmatimings[6] = {
83 {125, 160}, /* UDMA Mode 1 */ 83 {125, 160}, /* UDMA Mode 1 */
84 {100, 120}, /* UDMA Mode 2 */ 84 {100, 120}, /* UDMA Mode 2 */
85 {100, 90}, /* UDMA Mode 3 */ 85 {100, 90}, /* UDMA Mode 3 */
86 {85, 60}, /* UDMA Mode 4 */ 86 {100, 60}, /* UDMA Mode 4 */
87}; 87};
88 88
89static void palm_bk3710_setudmamode(void __iomem *base, unsigned int dev, 89static void palm_bk3710_setudmamode(void __iomem *base, unsigned int dev,
@@ -405,7 +405,6 @@ static int __devinit palm_bk3710_probe(struct platform_device *pdev)
405 ide_init_port_data(hwif, i); 405 ide_init_port_data(hwif, i);
406 ide_init_port_hw(hwif, &hw); 406 ide_init_port_hw(hwif, &hw);
407 407
408 hwif->mmio = 1;
409 default_hwif_mmiops(hwif); 408 default_hwif_mmiops(hwif);
410 409
411 idx[0] = i; 410 idx[0] = i;
diff --git a/drivers/ide/h8300/ide-h8300.c b/drivers/ide/h8300/ide-h8300.c
index ecf53bb0d2aa..ae37ee58bae2 100644
--- a/drivers/ide/h8300/ide-h8300.c
+++ b/drivers/ide/h8300/ide-h8300.c
@@ -52,8 +52,6 @@ static void h8300_tf_load(ide_drive_t *drive, ide_task_t *task)
52 if (task->tf_flags & IDE_TFLAG_FLAGGED) 52 if (task->tf_flags & IDE_TFLAG_FLAGGED)
53 HIHI = 0xFF; 53 HIHI = 0xFF;
54 54
55 ide_set_irq(drive, 1);
56
57 if (task->tf_flags & IDE_TFLAG_OUT_DATA) 55 if (task->tf_flags & IDE_TFLAG_OUT_DATA)
58 mm_outw((tf->hob_data << 8) | tf->data, io_ports->data_addr); 56 mm_outw((tf->hob_data << 8) | tf->data, io_ports->data_addr);
59 57
@@ -98,7 +96,7 @@ static void h8300_tf_read(ide_drive_t *drive, ide_task_t *task)
98 } 96 }
99 97
100 /* be sure we're looking at the low order bits */ 98 /* be sure we're looking at the low order bits */
101 outb(drive->ctl & ~0x80, io_ports->ctl_addr); 99 outb(ATA_DEVCTL_OBS & ~0x80, io_ports->ctl_addr);
102 100
103 if (task->tf_flags & IDE_TFLAG_IN_NSECT) 101 if (task->tf_flags & IDE_TFLAG_IN_NSECT)
104 tf->nsect = inb(io_ports->nsect_addr); 102 tf->nsect = inb(io_ports->nsect_addr);
@@ -112,7 +110,7 @@ static void h8300_tf_read(ide_drive_t *drive, ide_task_t *task)
112 tf->device = inb(io_ports->device_addr); 110 tf->device = inb(io_ports->device_addr);
113 111
114 if (task->tf_flags & IDE_TFLAG_LBA48) { 112 if (task->tf_flags & IDE_TFLAG_LBA48) {
115 outb(drive->ctl | 0x80, io_ports->ctl_addr); 113 outb(ATA_DEVCTL_OBS | 0x80, io_ports->ctl_addr);
116 114
117 if (task->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) 115 if (task->tf_flags & IDE_TFLAG_IN_HOB_FEATURE)
118 tf->hob_feature = inb(io_ports->feature_addr); 116 tf->hob_feature = inb(io_ports->feature_addr);
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index 9d3601fa5680..6f704628c27d 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -60,15 +60,15 @@ struct ide_acpi_hwif_link {
60#define DEBPRINT(fmt, args...) do {} while (0) 60#define DEBPRINT(fmt, args...) do {} while (0)
61#endif /* DEBUGGING */ 61#endif /* DEBUGGING */
62 62
63int ide_noacpi; 63static int ide_noacpi;
64module_param_named(noacpi, ide_noacpi, bool, 0); 64module_param_named(noacpi, ide_noacpi, bool, 0);
65MODULE_PARM_DESC(noacpi, "disable IDE ACPI support"); 65MODULE_PARM_DESC(noacpi, "disable IDE ACPI support");
66 66
67int ide_acpigtf; 67static int ide_acpigtf;
68module_param_named(acpigtf, ide_acpigtf, bool, 0); 68module_param_named(acpigtf, ide_acpigtf, bool, 0);
69MODULE_PARM_DESC(acpigtf, "enable IDE ACPI _GTF support"); 69MODULE_PARM_DESC(acpigtf, "enable IDE ACPI _GTF support");
70 70
71int ide_acpionboot; 71static int ide_acpionboot;
72module_param_named(acpionboot, ide_acpionboot, bool, 0); 72module_param_named(acpionboot, ide_acpionboot, bool, 0);
73MODULE_PARM_DESC(acpionboot, "call IDE ACPI methods on boot"); 73MODULE_PARM_DESC(acpionboot, "call IDE ACPI methods on boot");
74 74
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
new file mode 100644
index 000000000000..2802031de670
--- /dev/null
+++ b/drivers/ide/ide-atapi.c
@@ -0,0 +1,296 @@
1/*
2 * ATAPI support.
3 */
4
5#include <linux/kernel.h>
6#include <linux/delay.h>
7#include <linux/ide.h>
8#include <scsi/scsi.h>
9
10#ifdef DEBUG
11#define debug_log(fmt, args...) \
12 printk(KERN_INFO "ide: " fmt, ## args)
13#else
14#define debug_log(fmt, args...) do {} while (0)
15#endif
16
17/* TODO: unify the code thus making some arguments go away */
18ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc,
19 ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry,
20 void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *),
21 void (*retry_pc)(ide_drive_t *), void (*dsc_handle)(ide_drive_t *),
22 void (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned, int))
23{
24 ide_hwif_t *hwif = drive->hwif;
25 xfer_func_t *xferfunc;
26 unsigned int temp;
27 u16 bcount;
28 u8 stat, ireason, scsi = drive->scsi;
29
30 debug_log("Enter %s - interrupt handler\n", __func__);
31
32 if (pc->flags & PC_FLAG_TIMEDOUT) {
33 pc->callback(drive);
34 return ide_stopped;
35 }
36
37 /* Clear the interrupt */
38 stat = ide_read_status(drive);
39
40 if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
41 if (hwif->dma_ops->dma_end(drive) ||
42 (drive->media == ide_tape && !scsi && (stat & ERR_STAT))) {
43 if (drive->media == ide_floppy && !scsi)
44 printk(KERN_ERR "%s: DMA %s error\n",
45 drive->name, rq_data_dir(pc->rq)
46 ? "write" : "read");
47 pc->flags |= PC_FLAG_DMA_ERROR;
48 } else {
49 pc->xferred = pc->req_xfer;
50 if (update_buffers)
51 update_buffers(drive, pc);
52 }
53 debug_log("%s: DMA finished\n", drive->name);
54 }
55
56 /* No more interrupts */
57 if ((stat & DRQ_STAT) == 0) {
58 debug_log("Packet command completed, %d bytes transferred\n",
59 pc->xferred);
60
61 pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
62
63 local_irq_enable_in_hardirq();
64
65 if (drive->media == ide_tape && !scsi &&
66 (stat & ERR_STAT) && pc->c[0] == REQUEST_SENSE)
67 stat &= ~ERR_STAT;
68 if ((stat & ERR_STAT) || (pc->flags & PC_FLAG_DMA_ERROR)) {
69 /* Error detected */
70 debug_log("%s: I/O error\n", drive->name);
71
72 if (drive->media != ide_tape || scsi) {
73 pc->rq->errors++;
74 if (scsi)
75 goto cmd_finished;
76 }
77
78 if (pc->c[0] == REQUEST_SENSE) {
79 printk(KERN_ERR "%s: I/O error in request sense"
80 " command\n", drive->name);
81 return ide_do_reset(drive);
82 }
83
84 debug_log("[cmd %x]: check condition\n", pc->c[0]);
85
86 /* Retry operation */
87 retry_pc(drive);
88 /* queued, but not started */
89 return ide_stopped;
90 }
91cmd_finished:
92 pc->error = 0;
93 if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) &&
94 (stat & SEEK_STAT) == 0) {
95 dsc_handle(drive);
96 return ide_stopped;
97 }
98 /* Command finished - Call the callback function */
99 pc->callback(drive);
100 return ide_stopped;
101 }
102
103 if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
104 pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
105 printk(KERN_ERR "%s: The device wants to issue more interrupts "
106 "in DMA mode\n", drive->name);
107 ide_dma_off(drive);
108 return ide_do_reset(drive);
109 }
110 /* Get the number of bytes to transfer on this interrupt. */
111 bcount = (hwif->INB(hwif->io_ports.lbah_addr) << 8) |
112 hwif->INB(hwif->io_ports.lbam_addr);
113
114 ireason = hwif->INB(hwif->io_ports.nsect_addr);
115
116 if (ireason & CD) {
117 printk(KERN_ERR "%s: CoD != 0 in %s\n", drive->name, __func__);
118 return ide_do_reset(drive);
119 }
120 if (((ireason & IO) == IO) == !!(pc->flags & PC_FLAG_WRITING)) {
121 /* Hopefully, we will never get here */
122 printk(KERN_ERR "%s: We wanted to %s, but the device wants us "
123 "to %s!\n", drive->name,
124 (ireason & IO) ? "Write" : "Read",
125 (ireason & IO) ? "Read" : "Write");
126 return ide_do_reset(drive);
127 }
128 if (!(pc->flags & PC_FLAG_WRITING)) {
129 /* Reading - Check that we have enough space */
130 temp = pc->xferred + bcount;
131 if (temp > pc->req_xfer) {
132 if (temp > pc->buf_size) {
133 printk(KERN_ERR "%s: The device wants to send "
134 "us more data than expected - "
135 "discarding data\n",
136 drive->name);
137 if (scsi)
138 temp = pc->buf_size - pc->xferred;
139 else
140 temp = 0;
141 if (temp) {
142 if (pc->sg)
143 io_buffers(drive, pc, temp, 0);
144 else
145 hwif->input_data(drive, NULL,
146 pc->cur_pos, temp);
147 printk(KERN_ERR "%s: transferred %d of "
148 "%d bytes\n",
149 drive->name,
150 temp, bcount);
151 }
152 pc->xferred += temp;
153 pc->cur_pos += temp;
154 ide_pad_transfer(drive, 0, bcount - temp);
155 ide_set_handler(drive, handler, timeout,
156 expiry);
157 return ide_started;
158 }
159 debug_log("The device wants to send us more data than "
160 "expected - allowing transfer\n");
161 }
162 xferfunc = hwif->input_data;
163 } else
164 xferfunc = hwif->output_data;
165
166 if ((drive->media == ide_floppy && !scsi && !pc->buf) ||
167 (drive->media == ide_tape && !scsi && pc->bh) ||
168 (scsi && pc->sg))
169 io_buffers(drive, pc, bcount, !!(pc->flags & PC_FLAG_WRITING));
170 else
171 xferfunc(drive, NULL, pc->cur_pos, bcount);
172
173 /* Update the current position */
174 pc->xferred += bcount;
175 pc->cur_pos += bcount;
176
177 debug_log("[cmd %x] transferred %d bytes on that intr.\n",
178 pc->c[0], bcount);
179
180 /* And set the interrupt handler again */
181 ide_set_handler(drive, handler, timeout, expiry);
182 return ide_started;
183}
184EXPORT_SYMBOL_GPL(ide_pc_intr);
185
186static u8 ide_wait_ireason(ide_drive_t *drive, u8 ireason)
187{
188 ide_hwif_t *hwif = drive->hwif;
189 int retries = 100;
190
191 while (retries-- && ((ireason & CD) == 0 || (ireason & IO))) {
192 printk(KERN_ERR "%s: (IO,CoD != (0,1) while issuing "
193 "a packet command, retrying\n", drive->name);
194 udelay(100);
195 ireason = hwif->INB(hwif->io_ports.nsect_addr);
196 if (retries == 0) {
197 printk(KERN_ERR "%s: (IO,CoD != (0,1) while issuing "
198 "a packet command, ignoring\n",
199 drive->name);
200 ireason |= CD;
201 ireason &= ~IO;
202 }
203 }
204
205 return ireason;
206}
207
208ide_startstop_t ide_transfer_pc(ide_drive_t *drive, struct ide_atapi_pc *pc,
209 ide_handler_t *handler, unsigned int timeout,
210 ide_expiry_t *expiry)
211{
212 ide_hwif_t *hwif = drive->hwif;
213 ide_startstop_t startstop;
214 u8 ireason;
215
216 if (ide_wait_stat(&startstop, drive, DRQ_STAT, BUSY_STAT, WAIT_READY)) {
217 printk(KERN_ERR "%s: Strange, packet command initiated yet "
218 "DRQ isn't asserted\n", drive->name);
219 return startstop;
220 }
221
222 ireason = hwif->INB(hwif->io_ports.nsect_addr);
223 if (drive->media == ide_tape && !drive->scsi)
224 ireason = ide_wait_ireason(drive, ireason);
225
226 if ((ireason & CD) == 0 || (ireason & IO)) {
227 printk(KERN_ERR "%s: (IO,CoD) != (0,1) while issuing "
228 "a packet command\n", drive->name);
229 return ide_do_reset(drive);
230 }
231
232 /* Set the interrupt routine */
233 ide_set_handler(drive, handler, timeout, expiry);
234
235 /* Begin DMA, if necessary */
236 if (pc->flags & PC_FLAG_DMA_OK) {
237 pc->flags |= PC_FLAG_DMA_IN_PROGRESS;
238 hwif->dma_ops->dma_start(drive);
239 }
240
241 /* Send the actual packet */
242 if ((pc->flags & PC_FLAG_ZIP_DRIVE) == 0)
243 hwif->output_data(drive, NULL, pc->c, 12);
244
245 return ide_started;
246}
247EXPORT_SYMBOL_GPL(ide_transfer_pc);
248
249ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_atapi_pc *pc,
250 ide_handler_t *handler, unsigned int timeout,
251 ide_expiry_t *expiry)
252{
253 ide_hwif_t *hwif = drive->hwif;
254 u16 bcount;
255 u8 dma = 0;
256
257 /* We haven't transferred any data yet */
258 pc->xferred = 0;
259 pc->cur_pos = pc->buf;
260
261 /* Request to transfer the entire buffer at once */
262 if (drive->media == ide_tape && !drive->scsi)
263 bcount = pc->req_xfer;
264 else
265 bcount = min(pc->req_xfer, 63 * 1024);
266
267 if (pc->flags & PC_FLAG_DMA_ERROR) {
268 pc->flags &= ~PC_FLAG_DMA_ERROR;
269 ide_dma_off(drive);
270 }
271
272 if ((pc->flags & PC_FLAG_DMA_OK) && drive->using_dma) {
273 if (drive->scsi)
274 hwif->sg_mapped = 1;
275 dma = !hwif->dma_ops->dma_setup(drive);
276 if (drive->scsi)
277 hwif->sg_mapped = 0;
278 }
279
280 if (!dma)
281 pc->flags &= ~PC_FLAG_DMA_OK;
282
283 ide_pktcmd_tf_load(drive, drive->scsi ? 0 : IDE_TFLAG_OUT_DEVICE,
284 bcount, dma);
285
286 /* Issue the packet command */
287 if (pc->flags & PC_FLAG_DRQ_INTERRUPT) {
288 ide_execute_command(drive, WIN_PACKETCMD, handler,
289 timeout, NULL);
290 return ide_started;
291 } else {
292 ide_execute_pkt_cmd(drive);
293 return (*handler)(drive);
294 }
295}
296EXPORT_SYMBOL_GPL(ide_issue_pc);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 68e7f19dc036..d99847157186 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -188,16 +188,6 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive,
188 ide_cd_log_error(drive->name, failed_command, sense); 188 ide_cd_log_error(drive->name, failed_command, sense);
189} 189}
190 190
191/* Initialize a ide-cd packet command request */
192void ide_cd_init_rq(ide_drive_t *drive, struct request *rq)
193{
194 struct cdrom_info *cd = drive->driver_data;
195
196 ide_init_drive_cmd(rq);
197 rq->cmd_type = REQ_TYPE_ATA_PC;
198 rq->rq_disk = cd->disk;
199}
200
201static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense, 191static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
202 struct request *failed_command) 192 struct request *failed_command)
203{ 193{
@@ -208,7 +198,9 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
208 sense = &info->sense_data; 198 sense = &info->sense_data;
209 199
210 /* stuff the sense request in front of our current request */ 200 /* stuff the sense request in front of our current request */
211 ide_cd_init_rq(drive, rq); 201 blk_rq_init(NULL, rq);
202 rq->cmd_type = REQ_TYPE_ATA_PC;
203 rq->rq_disk = info->disk;
212 204
213 rq->data = sense; 205 rq->data = sense;
214 rq->cmd[0] = GPCMD_REQUEST_SENSE; 206 rq->cmd[0] = GPCMD_REQUEST_SENSE;
@@ -216,11 +208,12 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
216 rq->data_len = 18; 208 rq->data_len = 18;
217 209
218 rq->cmd_type = REQ_TYPE_SENSE; 210 rq->cmd_type = REQ_TYPE_SENSE;
211 rq->cmd_flags |= REQ_PREEMPT;
219 212
220 /* NOTE! Save the failed command in "rq->buffer" */ 213 /* NOTE! Save the failed command in "rq->buffer" */
221 rq->buffer = (void *) failed_command; 214 rq->buffer = (void *) failed_command;
222 215
223 (void) ide_do_drive_cmd(drive, rq, ide_preempt); 216 ide_do_drive_cmd(drive, rq);
224} 217}
225 218
226static void cdrom_end_request(ide_drive_t *drive, int uptodate) 219static void cdrom_end_request(ide_drive_t *drive, int uptodate)
@@ -537,8 +530,8 @@ static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive,
537 info->dma = !hwif->dma_ops->dma_setup(drive); 530 info->dma = !hwif->dma_ops->dma_setup(drive);
538 531
539 /* set up the controller registers */ 532 /* set up the controller registers */
540 ide_pktcmd_tf_load(drive, IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL | 533 ide_pktcmd_tf_load(drive, IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL,
541 IDE_TFLAG_NO_SELECT_MASK, xferlen, info->dma); 534 xferlen, info->dma);
542 535
543 if (info->cd_flags & IDE_CD_FLAG_DRQ_INTERRUPT) { 536 if (info->cd_flags & IDE_CD_FLAG_DRQ_INTERRUPT) {
544 /* waiting for CDB interrupt, not DMA yet. */ 537 /* waiting for CDB interrupt, not DMA yet. */
@@ -838,34 +831,54 @@ static void ide_cd_request_sense_fixup(struct request *rq)
838 } 831 }
839} 832}
840 833
841int ide_cd_queue_pc(ide_drive_t *drive, struct request *rq) 834int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
835 int write, void *buffer, unsigned *bufflen,
836 struct request_sense *sense, int timeout,
837 unsigned int cmd_flags)
842{ 838{
843 struct request_sense sense; 839 struct cdrom_info *info = drive->driver_data;
840 struct request_sense local_sense;
844 int retries = 10; 841 int retries = 10;
845 unsigned int flags = rq->cmd_flags; 842 unsigned int flags = 0;
846 843
847 if (rq->sense == NULL) 844 if (!sense)
848 rq->sense = &sense; 845 sense = &local_sense;
849 846
850 /* start of retry loop */ 847 /* start of retry loop */
851 do { 848 do {
849 struct request *rq;
852 int error; 850 int error;
853 unsigned long time = jiffies;
854 rq->cmd_flags = flags;
855 851
856 error = ide_do_drive_cmd(drive, rq, ide_wait); 852 rq = blk_get_request(drive->queue, write, __GFP_WAIT);
857 time = jiffies - time; 853
854 memcpy(rq->cmd, cmd, BLK_MAX_CDB);
855 rq->cmd_type = REQ_TYPE_ATA_PC;
856 rq->sense = sense;
857 rq->cmd_flags |= cmd_flags;
858 rq->timeout = timeout;
859 if (buffer) {
860 rq->data = buffer;
861 rq->data_len = *bufflen;
862 }
863
864 error = blk_execute_rq(drive->queue, info->disk, rq, 0);
865
866 if (buffer)
867 *bufflen = rq->data_len;
868
869 flags = rq->cmd_flags;
870 blk_put_request(rq);
858 871
859 /* 872 /*
860 * FIXME: we should probably abort/retry or something in case of 873 * FIXME: we should probably abort/retry or something in case of
861 * failure. 874 * failure.
862 */ 875 */
863 if (rq->cmd_flags & REQ_FAILED) { 876 if (flags & REQ_FAILED) {
864 /* 877 /*
865 * The request failed. Retry if it was due to a unit 878 * The request failed. Retry if it was due to a unit
866 * attention status (usually means media was changed). 879 * attention status (usually means media was changed).
867 */ 880 */
868 struct request_sense *reqbuf = rq->sense; 881 struct request_sense *reqbuf = sense;
869 882
870 if (reqbuf->sense_key == UNIT_ATTENTION) 883 if (reqbuf->sense_key == UNIT_ATTENTION)
871 cdrom_saw_media_change(drive); 884 cdrom_saw_media_change(drive);
@@ -885,10 +898,10 @@ int ide_cd_queue_pc(ide_drive_t *drive, struct request *rq)
885 } 898 }
886 899
887 /* end of retry loop */ 900 /* end of retry loop */
888 } while ((rq->cmd_flags & REQ_FAILED) && retries >= 0); 901 } while ((flags & REQ_FAILED) && retries >= 0);
889 902
890 /* return an error if the command failed */ 903 /* return an error if the command failed */
891 return (rq->cmd_flags & REQ_FAILED) ? -EIO : 0; 904 return (flags & REQ_FAILED) ? -EIO : 0;
892} 905}
893 906
894/* 907/*
@@ -1268,23 +1281,20 @@ static void msf_from_bcd(struct atapi_msf *msf)
1268 1281
1269int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense) 1282int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense)
1270{ 1283{
1271 struct request req;
1272 struct cdrom_info *info = drive->driver_data; 1284 struct cdrom_info *info = drive->driver_data;
1273 struct cdrom_device_info *cdi = &info->devinfo; 1285 struct cdrom_device_info *cdi = &info->devinfo;
1286 unsigned char cmd[BLK_MAX_CDB];
1274 1287
1275 ide_cd_init_rq(drive, &req); 1288 memset(cmd, 0, BLK_MAX_CDB);
1276 1289 cmd[0] = GPCMD_TEST_UNIT_READY;
1277 req.sense = sense;
1278 req.cmd[0] = GPCMD_TEST_UNIT_READY;
1279 req.cmd_flags |= REQ_QUIET;
1280 1290
1281 /* 1291 /*
1282 * Sanyo 3 CD changer uses byte 7 of TEST_UNIT_READY to switch CDs 1292 * Sanyo 3 CD changer uses byte 7 of TEST_UNIT_READY to switch CDs
1283 * instead of supporting the LOAD_UNLOAD opcode. 1293 * instead of supporting the LOAD_UNLOAD opcode.
1284 */ 1294 */
1285 req.cmd[7] = cdi->sanyo_slot % 3; 1295 cmd[7] = cdi->sanyo_slot % 3;
1286 1296
1287 return ide_cd_queue_pc(drive, &req); 1297 return ide_cd_queue_pc(drive, cmd, 0, NULL, 0, sense, 0, REQ_QUIET);
1288} 1298}
1289 1299
1290static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity, 1300static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
@@ -1297,17 +1307,14 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
1297 } capbuf; 1307 } capbuf;
1298 1308
1299 int stat; 1309 int stat;
1300 struct request req; 1310 unsigned char cmd[BLK_MAX_CDB];
1301 1311 unsigned len = sizeof(capbuf);
1302 ide_cd_init_rq(drive, &req);
1303 1312
1304 req.sense = sense; 1313 memset(cmd, 0, BLK_MAX_CDB);
1305 req.cmd[0] = GPCMD_READ_CDVD_CAPACITY; 1314 cmd[0] = GPCMD_READ_CDVD_CAPACITY;
1306 req.data = (char *)&capbuf;
1307 req.data_len = sizeof(capbuf);
1308 req.cmd_flags |= REQ_QUIET;
1309 1315
1310 stat = ide_cd_queue_pc(drive, &req); 1316 stat = ide_cd_queue_pc(drive, cmd, 0, &capbuf, &len, sense, 0,
1317 REQ_QUIET);
1311 if (stat == 0) { 1318 if (stat == 0) {
1312 *capacity = 1 + be32_to_cpu(capbuf.lba); 1319 *capacity = 1 + be32_to_cpu(capbuf.lba);
1313 *sectors_per_frame = 1320 *sectors_per_frame =
@@ -1321,24 +1328,20 @@ static int cdrom_read_tocentry(ide_drive_t *drive, int trackno, int msf_flag,
1321 int format, char *buf, int buflen, 1328 int format, char *buf, int buflen,
1322 struct request_sense *sense) 1329 struct request_sense *sense)
1323{ 1330{
1324 struct request req; 1331 unsigned char cmd[BLK_MAX_CDB];
1325 1332
1326 ide_cd_init_rq(drive, &req); 1333 memset(cmd, 0, BLK_MAX_CDB);
1327 1334
1328 req.sense = sense; 1335 cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
1329 req.data = buf; 1336 cmd[6] = trackno;
1330 req.data_len = buflen; 1337 cmd[7] = (buflen >> 8);
1331 req.cmd_flags |= REQ_QUIET; 1338 cmd[8] = (buflen & 0xff);
1332 req.cmd[0] = GPCMD_READ_TOC_PMA_ATIP; 1339 cmd[9] = (format << 6);
1333 req.cmd[6] = trackno;
1334 req.cmd[7] = (buflen >> 8);
1335 req.cmd[8] = (buflen & 0xff);
1336 req.cmd[9] = (format << 6);
1337 1340
1338 if (msf_flag) 1341 if (msf_flag)
1339 req.cmd[1] = 2; 1342 cmd[1] = 2;
1340 1343
1341 return ide_cd_queue_pc(drive, &req); 1344 return ide_cd_queue_pc(drive, cmd, 0, buf, &buflen, sense, 0, REQ_QUIET);
1342} 1345}
1343 1346
1344/* Try to read the entire TOC for the disk into our internal buffer. */ 1347/* Try to read the entire TOC for the disk into our internal buffer. */
@@ -2103,11 +2106,6 @@ static int ide_cd_probe(ide_drive_t *drive)
2103 goto failed; 2106 goto failed;
2104 } 2107 }
2105 } 2108 }
2106 if (drive->scsi) {
2107 printk(KERN_INFO "ide-cd: passing drive %s to ide-scsi "
2108 "emulation.\n", drive->name);
2109 goto failed;
2110 }
2111 info = kzalloc(sizeof(struct cdrom_info), GFP_KERNEL); 2109 info = kzalloc(sizeof(struct cdrom_info), GFP_KERNEL);
2112 if (info == NULL) { 2110 if (info == NULL) {
2113 printk(KERN_ERR "%s: Can't allocate a cdrom structure\n", 2111 printk(KERN_ERR "%s: Can't allocate a cdrom structure\n",
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index a58801c4484d..fe0ea36e4124 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -143,8 +143,8 @@ struct cdrom_info {
143void ide_cd_log_error(const char *, struct request *, struct request_sense *); 143void ide_cd_log_error(const char *, struct request *, struct request_sense *);
144 144
145/* ide-cd.c functions used by ide-cd_ioctl.c */ 145/* ide-cd.c functions used by ide-cd_ioctl.c */
146void ide_cd_init_rq(ide_drive_t *, struct request *); 146int ide_cd_queue_pc(ide_drive_t *, const unsigned char *, int, void *,
147int ide_cd_queue_pc(ide_drive_t *, struct request *); 147 unsigned *, struct request_sense *, int, unsigned int);
148int ide_cd_read_toc(ide_drive_t *, struct request_sense *); 148int ide_cd_read_toc(ide_drive_t *, struct request_sense *);
149int ide_cdrom_get_capabilities(ide_drive_t *, u8 *); 149int ide_cdrom_get_capabilities(ide_drive_t *, u8 *);
150void ide_cdrom_update_speed(ide_drive_t *, u8 *); 150void ide_cdrom_update_speed(ide_drive_t *, u8 *);
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 6d147ce6782f..24d002addf73 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -104,8 +104,8 @@ int cdrom_eject(ide_drive_t *drive, int ejectflag,
104{ 104{
105 struct cdrom_info *cd = drive->driver_data; 105 struct cdrom_info *cd = drive->driver_data;
106 struct cdrom_device_info *cdi = &cd->devinfo; 106 struct cdrom_device_info *cdi = &cd->devinfo;
107 struct request req;
108 char loej = 0x02; 107 char loej = 0x02;
108 unsigned char cmd[BLK_MAX_CDB];
109 109
110 if ((cd->cd_flags & IDE_CD_FLAG_NO_EJECT) && !ejectflag) 110 if ((cd->cd_flags & IDE_CD_FLAG_NO_EJECT) && !ejectflag)
111 return -EDRIVE_CANT_DO_THIS; 111 return -EDRIVE_CANT_DO_THIS;
@@ -114,17 +114,16 @@ int cdrom_eject(ide_drive_t *drive, int ejectflag,
114 if ((cd->cd_flags & IDE_CD_FLAG_DOOR_LOCKED) && ejectflag) 114 if ((cd->cd_flags & IDE_CD_FLAG_DOOR_LOCKED) && ejectflag)
115 return 0; 115 return 0;
116 116
117 ide_cd_init_rq(drive, &req);
118
119 /* only tell drive to close tray if open, if it can do that */ 117 /* only tell drive to close tray if open, if it can do that */
120 if (ejectflag && (cdi->mask & CDC_CLOSE_TRAY)) 118 if (ejectflag && (cdi->mask & CDC_CLOSE_TRAY))
121 loej = 0; 119 loej = 0;
122 120
123 req.sense = sense; 121 memset(cmd, 0, BLK_MAX_CDB);
124 req.cmd[0] = GPCMD_START_STOP_UNIT; 122
125 req.cmd[4] = loej | (ejectflag != 0); 123 cmd[0] = GPCMD_START_STOP_UNIT;
124 cmd[4] = loej | (ejectflag != 0);
126 125
127 return ide_cd_queue_pc(drive, &req); 126 return ide_cd_queue_pc(drive, cmd, 0, NULL, 0, sense, 0, 0);
128} 127}
129 128
130/* Lock the door if LOCKFLAG is nonzero; unlock it otherwise. */ 129/* Lock the door if LOCKFLAG is nonzero; unlock it otherwise. */
@@ -134,7 +133,6 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag,
134{ 133{
135 struct cdrom_info *cd = drive->driver_data; 134 struct cdrom_info *cd = drive->driver_data;
136 struct request_sense my_sense; 135 struct request_sense my_sense;
137 struct request req;
138 int stat; 136 int stat;
139 137
140 if (sense == NULL) 138 if (sense == NULL)
@@ -144,11 +142,15 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag,
144 if (cd->cd_flags & IDE_CD_FLAG_NO_DOORLOCK) { 142 if (cd->cd_flags & IDE_CD_FLAG_NO_DOORLOCK) {
145 stat = 0; 143 stat = 0;
146 } else { 144 } else {
147 ide_cd_init_rq(drive, &req); 145 unsigned char cmd[BLK_MAX_CDB];
148 req.sense = sense; 146
149 req.cmd[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL; 147 memset(cmd, 0, BLK_MAX_CDB);
150 req.cmd[4] = lockflag ? 1 : 0; 148
151 stat = ide_cd_queue_pc(drive, &req); 149 cmd[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL;
150 cmd[4] = lockflag ? 1 : 0;
151
152 stat = ide_cd_queue_pc(drive, cmd, 0, NULL, 0,
153 sense, 0, 0);
152 } 154 }
153 155
154 /* If we got an illegal field error, the drive 156 /* If we got an illegal field error, the drive
@@ -206,32 +208,30 @@ int ide_cdrom_select_speed(struct cdrom_device_info *cdi, int speed)
206{ 208{
207 ide_drive_t *drive = cdi->handle; 209 ide_drive_t *drive = cdi->handle;
208 struct cdrom_info *cd = drive->driver_data; 210 struct cdrom_info *cd = drive->driver_data;
209 struct request rq;
210 struct request_sense sense; 211 struct request_sense sense;
211 u8 buf[ATAPI_CAPABILITIES_PAGE_SIZE]; 212 u8 buf[ATAPI_CAPABILITIES_PAGE_SIZE];
212 int stat; 213 int stat;
213 214 unsigned char cmd[BLK_MAX_CDB];
214 ide_cd_init_rq(drive, &rq);
215
216 rq.sense = &sense;
217 215
218 if (speed == 0) 216 if (speed == 0)
219 speed = 0xffff; /* set to max */ 217 speed = 0xffff; /* set to max */
220 else 218 else
221 speed *= 177; /* Nx to kbytes/s */ 219 speed *= 177; /* Nx to kbytes/s */
222 220
223 rq.cmd[0] = GPCMD_SET_SPEED; 221 memset(cmd, 0, BLK_MAX_CDB);
222
223 cmd[0] = GPCMD_SET_SPEED;
224 /* Read Drive speed in kbytes/second MSB/LSB */ 224 /* Read Drive speed in kbytes/second MSB/LSB */
225 rq.cmd[2] = (speed >> 8) & 0xff; 225 cmd[2] = (speed >> 8) & 0xff;
226 rq.cmd[3] = speed & 0xff; 226 cmd[3] = speed & 0xff;
227 if ((cdi->mask & (CDC_CD_R | CDC_CD_RW | CDC_DVD_R)) != 227 if ((cdi->mask & (CDC_CD_R | CDC_CD_RW | CDC_DVD_R)) !=
228 (CDC_CD_R | CDC_CD_RW | CDC_DVD_R)) { 228 (CDC_CD_R | CDC_CD_RW | CDC_DVD_R)) {
229 /* Write Drive speed in kbytes/second MSB/LSB */ 229 /* Write Drive speed in kbytes/second MSB/LSB */
230 rq.cmd[4] = (speed >> 8) & 0xff; 230 cmd[4] = (speed >> 8) & 0xff;
231 rq.cmd[5] = speed & 0xff; 231 cmd[5] = speed & 0xff;
232 } 232 }
233 233
234 stat = ide_cd_queue_pc(drive, &rq); 234 stat = ide_cd_queue_pc(drive, cmd, 0, NULL, 0, &sense, 0, 0);
235 235
236 if (!ide_cdrom_get_capabilities(drive, buf)) { 236 if (!ide_cdrom_get_capabilities(drive, buf)) {
237 ide_cdrom_update_speed(drive, buf); 237 ide_cdrom_update_speed(drive, buf);
@@ -268,21 +268,19 @@ int ide_cdrom_get_mcn(struct cdrom_device_info *cdi,
268{ 268{
269 ide_drive_t *drive = cdi->handle; 269 ide_drive_t *drive = cdi->handle;
270 int stat, mcnlen; 270 int stat, mcnlen;
271 struct request rq;
272 char buf[24]; 271 char buf[24];
272 unsigned char cmd[BLK_MAX_CDB];
273 unsigned len = sizeof(buf);
273 274
274 ide_cd_init_rq(drive, &rq); 275 memset(cmd, 0, BLK_MAX_CDB);
275 276
276 rq.data = buf; 277 cmd[0] = GPCMD_READ_SUBCHANNEL;
277 rq.data_len = sizeof(buf); 278 cmd[1] = 2; /* MSF addressing */
279 cmd[2] = 0x40; /* request subQ data */
280 cmd[3] = 2; /* format */
281 cmd[8] = len;
278 282
279 rq.cmd[0] = GPCMD_READ_SUBCHANNEL; 283 stat = ide_cd_queue_pc(drive, cmd, 0, buf, &len, NULL, 0, 0);
280 rq.cmd[1] = 2; /* MSF addressing */
281 rq.cmd[2] = 0x40; /* request subQ data */
282 rq.cmd[3] = 2; /* format */
283 rq.cmd[8] = sizeof(buf);
284
285 stat = ide_cd_queue_pc(drive, &rq);
286 if (stat) 284 if (stat)
287 return stat; 285 return stat;
288 286
@@ -298,14 +296,14 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
298 ide_drive_t *drive = cdi->handle; 296 ide_drive_t *drive = cdi->handle;
299 struct cdrom_info *cd = drive->driver_data; 297 struct cdrom_info *cd = drive->driver_data;
300 struct request_sense sense; 298 struct request_sense sense;
301 struct request req; 299 struct request *rq;
302 int ret; 300 int ret;
303 301
304 ide_cd_init_rq(drive, &req); 302 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
305 req.cmd_type = REQ_TYPE_SPECIAL; 303 rq->cmd_type = REQ_TYPE_SPECIAL;
306 req.cmd_flags = REQ_QUIET; 304 rq->cmd_flags = REQ_QUIET;
307 ret = ide_do_drive_cmd(drive, &req, ide_wait); 305 ret = blk_execute_rq(drive->queue, cd->disk, rq, 0);
308 306 blk_put_request(rq);
309 /* 307 /*
310 * A reset will unlock the door. If it was previously locked, 308 * A reset will unlock the door. If it was previously locked,
311 * lock it again. 309 * lock it again.
@@ -351,8 +349,8 @@ static int ide_cd_fake_play_trkind(ide_drive_t *drive, void *arg)
351 struct atapi_toc_entry *first_toc, *last_toc; 349 struct atapi_toc_entry *first_toc, *last_toc;
352 unsigned long lba_start, lba_end; 350 unsigned long lba_start, lba_end;
353 int stat; 351 int stat;
354 struct request rq;
355 struct request_sense sense; 352 struct request_sense sense;
353 unsigned char cmd[BLK_MAX_CDB];
356 354
357 stat = ide_cd_get_toc_entry(drive, ti->cdti_trk0, &first_toc); 355 stat = ide_cd_get_toc_entry(drive, ti->cdti_trk0, &first_toc);
358 if (stat) 356 if (stat)
@@ -370,14 +368,13 @@ static int ide_cd_fake_play_trkind(ide_drive_t *drive, void *arg)
370 if (lba_end <= lba_start) 368 if (lba_end <= lba_start)
371 return -EINVAL; 369 return -EINVAL;
372 370
373 ide_cd_init_rq(drive, &rq); 371 memset(cmd, 0, BLK_MAX_CDB);
374 372
375 rq.sense = &sense; 373 cmd[0] = GPCMD_PLAY_AUDIO_MSF;
376 rq.cmd[0] = GPCMD_PLAY_AUDIO_MSF; 374 lba_to_msf(lba_start, &cmd[3], &cmd[4], &cmd[5]);
377 lba_to_msf(lba_start, &rq.cmd[3], &rq.cmd[4], &rq.cmd[5]); 375 lba_to_msf(lba_end - 1, &cmd[6], &cmd[7], &cmd[8]);
378 lba_to_msf(lba_end - 1, &rq.cmd[6], &rq.cmd[7], &rq.cmd[8]);
379 376
380 return ide_cd_queue_pc(drive, &rq); 377 return ide_cd_queue_pc(drive, cmd, 0, NULL, 0, &sense, 0, 0);
381} 378}
382 379
383static int ide_cd_read_tochdr(ide_drive_t *drive, void *arg) 380static int ide_cd_read_tochdr(ide_drive_t *drive, void *arg)
@@ -447,8 +444,9 @@ int ide_cdrom_audio_ioctl(struct cdrom_device_info *cdi,
447int ide_cdrom_packet(struct cdrom_device_info *cdi, 444int ide_cdrom_packet(struct cdrom_device_info *cdi,
448 struct packet_command *cgc) 445 struct packet_command *cgc)
449{ 446{
450 struct request req;
451 ide_drive_t *drive = cdi->handle; 447 ide_drive_t *drive = cdi->handle;
448 unsigned int flags = 0;
449 unsigned len = cgc->buflen;
452 450
453 if (cgc->timeout <= 0) 451 if (cgc->timeout <= 0)
454 cgc->timeout = ATAPI_WAIT_PC; 452 cgc->timeout = ATAPI_WAIT_PC;
@@ -456,24 +454,21 @@ int ide_cdrom_packet(struct cdrom_device_info *cdi,
456 /* here we queue the commands from the uniform CD-ROM 454 /* here we queue the commands from the uniform CD-ROM
457 layer. the packet must be complete, as we do not 455 layer. the packet must be complete, as we do not
458 touch it at all. */ 456 touch it at all. */
459 ide_cd_init_rq(drive, &req);
460 457
461 if (cgc->data_direction == CGC_DATA_WRITE) 458 if (cgc->data_direction == CGC_DATA_WRITE)
462 req.cmd_flags |= REQ_RW; 459 flags |= REQ_RW;
463 460
464 memcpy(req.cmd, cgc->cmd, CDROM_PACKET_SIZE);
465 if (cgc->sense) 461 if (cgc->sense)
466 memset(cgc->sense, 0, sizeof(struct request_sense)); 462 memset(cgc->sense, 0, sizeof(struct request_sense));
467 req.data = cgc->buffer;
468 req.data_len = cgc->buflen;
469 req.timeout = cgc->timeout;
470 463
471 if (cgc->quiet) 464 if (cgc->quiet)
472 req.cmd_flags |= REQ_QUIET; 465 flags |= REQ_QUIET;
473 466
474 req.sense = cgc->sense; 467 cgc->stat = ide_cd_queue_pc(drive, cgc->cmd,
475 cgc->stat = ide_cd_queue_pc(drive, &req); 468 cgc->data_direction == CGC_DATA_WRITE,
469 cgc->buffer, &len,
470 cgc->sense, cgc->timeout, flags);
476 if (!cgc->stat) 471 if (!cgc->stat)
477 cgc->buflen -= req.data_len; 472 cgc->buflen -= len;
478 return cgc->stat; 473 return cgc->stat;
479} 474}
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 8e08d083fce9..5f49a4ae9dd8 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -198,8 +198,7 @@ static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
198 } 198 }
199 199
200 memset(&task, 0, sizeof(task)); 200 memset(&task, 0, sizeof(task));
201 task.tf_flags = IDE_TFLAG_NO_SELECT_MASK; /* FIXME? */ 201 task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
202 task.tf_flags |= (IDE_TFLAG_TF | IDE_TFLAG_DEVICE);
203 202
204 if (drive->select.b.lba) { 203 if (drive->select.b.lba) {
205 if (lba48) { 204 if (lba48) {
@@ -617,7 +616,8 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
617 */ 616 */
618static int set_multcount(ide_drive_t *drive, int arg) 617static int set_multcount(ide_drive_t *drive, int arg)
619{ 618{
620 struct request rq; 619 struct request *rq;
620 int error;
621 621
622 if (arg < 0 || arg > drive->id->max_multsect) 622 if (arg < 0 || arg > drive->id->max_multsect)
623 return -EINVAL; 623 return -EINVAL;
@@ -625,12 +625,13 @@ static int set_multcount(ide_drive_t *drive, int arg)
625 if (drive->special.b.set_multmode) 625 if (drive->special.b.set_multmode)
626 return -EBUSY; 626 return -EBUSY;
627 627
628 ide_init_drive_cmd(&rq); 628 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
629 rq.cmd_type = REQ_TYPE_ATA_TASKFILE; 629 rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
630 630
631 drive->mult_req = arg; 631 drive->mult_req = arg;
632 drive->special.b.set_multmode = 1; 632 drive->special.b.set_multmode = 1;
633 (void)ide_do_drive_cmd(drive, &rq, ide_wait); 633 error = blk_execute_rq(drive->queue, NULL, rq, 0);
634 blk_put_request(rq);
634 635
635 return (drive->mult_count == arg) ? 0 : -EIO; 636 return (drive->mult_count == arg) ? 0 : -EIO;
636} 637}
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 653b1ade13d3..7ee44f86bc54 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -463,7 +463,7 @@ int ide_dma_setup(ide_drive_t *drive)
463 } 463 }
464 464
465 /* PRD table */ 465 /* PRD table */
466 if (hwif->mmio) 466 if (hwif->host_flags & IDE_HFLAG_MMIO)
467 writel(hwif->dmatable_dma, 467 writel(hwif->dmatable_dma,
468 (void __iomem *)(hwif->dma_base + ATA_DMA_TABLE_OFS)); 468 (void __iomem *)(hwif->dma_base + ATA_DMA_TABLE_OFS));
469 else 469 else
@@ -692,7 +692,7 @@ static int ide_tune_dma(ide_drive_t *drive)
692 ide_hwif_t *hwif = drive->hwif; 692 ide_hwif_t *hwif = drive->hwif;
693 u8 speed; 693 u8 speed;
694 694
695 if (noautodma || drive->nodma || (drive->id->capability & 1) == 0) 695 if (drive->nodma || (drive->id->capability & 1) == 0)
696 return 0; 696 return 0;
697 697
698 /* consult the list of known "bad" drives */ 698 /* consult the list of known "bad" drives */
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index f05fbc2bd7a8..b3689437269f 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -286,11 +286,12 @@ static void idefloppy_queue_pc_head(ide_drive_t *drive, struct ide_atapi_pc *pc,
286{ 286{
287 struct ide_floppy_obj *floppy = drive->driver_data; 287 struct ide_floppy_obj *floppy = drive->driver_data;
288 288
289 ide_init_drive_cmd(rq); 289 blk_rq_init(NULL, rq);
290 rq->buffer = (char *) pc; 290 rq->buffer = (char *) pc;
291 rq->cmd_type = REQ_TYPE_SPECIAL; 291 rq->cmd_type = REQ_TYPE_SPECIAL;
292 rq->cmd_flags |= REQ_PREEMPT;
292 rq->rq_disk = floppy->disk; 293 rq->rq_disk = floppy->disk;
293 (void) ide_do_drive_cmd(drive, rq, ide_preempt); 294 ide_do_drive_cmd(drive, rq);
294} 295}
295 296
296static struct ide_atapi_pc *idefloppy_next_pc_storage(ide_drive_t *drive) 297static struct ide_atapi_pc *idefloppy_next_pc_storage(ide_drive_t *drive)
@@ -311,50 +312,41 @@ static struct request *idefloppy_next_rq_storage(ide_drive_t *drive)
311 return (&floppy->rq_stack[floppy->rq_stack_index++]); 312 return (&floppy->rq_stack[floppy->rq_stack_index++]);
312} 313}
313 314
314static void idefloppy_request_sense_callback(ide_drive_t *drive) 315static void ide_floppy_callback(ide_drive_t *drive)
315{ 316{
316 idefloppy_floppy_t *floppy = drive->driver_data; 317 idefloppy_floppy_t *floppy = drive->driver_data;
317 u8 *buf = floppy->pc->buf; 318 struct ide_atapi_pc *pc = floppy->pc;
319 int uptodate = pc->error ? 0 : 1;
318 320
319 debug_log("Reached %s\n", __func__); 321 debug_log("Reached %s\n", __func__);
320 322
321 if (!floppy->pc->error) { 323 if (floppy->failed_pc == pc)
322 floppy->sense_key = buf[2] & 0x0F; 324 floppy->failed_pc = NULL;
323 floppy->asc = buf[12];
324 floppy->ascq = buf[13];
325 floppy->progress_indication = buf[15] & 0x80 ?
326 (u16)get_unaligned((u16 *)&buf[16]) : 0x10000;
327 325
328 if (floppy->failed_pc) 326 if (pc->c[0] == GPCMD_READ_10 || pc->c[0] == GPCMD_WRITE_10 ||
329 debug_log("pc = %x, sense key = %x, asc = %x," 327 (pc->rq && blk_pc_request(pc->rq)))
330 " ascq = %x\n", 328 uptodate = 1; /* FIXME */
331 floppy->failed_pc->c[0], 329 else if (pc->c[0] == GPCMD_REQUEST_SENSE) {
332 floppy->sense_key, 330 u8 *buf = floppy->pc->buf;
333 floppy->asc,
334 floppy->ascq);
335 else
336 debug_log("sense key = %x, asc = %x, ascq = %x\n",
337 floppy->sense_key,
338 floppy->asc,
339 floppy->ascq);
340 331
332 if (!pc->error) {
333 floppy->sense_key = buf[2] & 0x0F;
334 floppy->asc = buf[12];
335 floppy->ascq = buf[13];
336 floppy->progress_indication = buf[15] & 0x80 ?
337 (u16)get_unaligned((u16 *)&buf[16]) : 0x10000;
341 338
342 idefloppy_end_request(drive, 1, 0); 339 if (floppy->failed_pc)
343 } else { 340 debug_log("pc = %x, ", floppy->failed_pc->c[0]);
344 printk(KERN_ERR "Error in REQUEST SENSE itself - Aborting"
345 " request!\n");
346 idefloppy_end_request(drive, 0, 0);
347 }
348}
349 341
350/* General packet command callback function. */ 342 debug_log("sense key = %x, asc = %x, ascq = %x\n",
351static void idefloppy_pc_callback(ide_drive_t *drive) 343 floppy->sense_key, floppy->asc, floppy->ascq);
352{ 344 } else
353 idefloppy_floppy_t *floppy = drive->driver_data; 345 printk(KERN_ERR "Error in REQUEST SENSE itself - "
354 346 "Aborting request!\n");
355 debug_log("Reached %s\n", __func__); 347 }
356 348
357 idefloppy_end_request(drive, floppy->pc->error ? 0 : 1, 0); 349 idefloppy_end_request(drive, uptodate, 0);
358} 350}
359 351
360static void idefloppy_init_pc(struct ide_atapi_pc *pc) 352static void idefloppy_init_pc(struct ide_atapi_pc *pc)
@@ -365,7 +357,7 @@ static void idefloppy_init_pc(struct ide_atapi_pc *pc)
365 pc->req_xfer = 0; 357 pc->req_xfer = 0;
366 pc->buf = pc->pc_buf; 358 pc->buf = pc->pc_buf;
367 pc->buf_size = IDEFLOPPY_PC_BUFFER_SIZE; 359 pc->buf_size = IDEFLOPPY_PC_BUFFER_SIZE;
368 pc->idefloppy_callback = &idefloppy_pc_callback; 360 pc->callback = ide_floppy_callback;
369} 361}
370 362
371static void idefloppy_create_request_sense_cmd(struct ide_atapi_pc *pc) 363static void idefloppy_create_request_sense_cmd(struct ide_atapi_pc *pc)
@@ -374,7 +366,6 @@ static void idefloppy_create_request_sense_cmd(struct ide_atapi_pc *pc)
374 pc->c[0] = GPCMD_REQUEST_SENSE; 366 pc->c[0] = GPCMD_REQUEST_SENSE;
375 pc->c[4] = 255; 367 pc->c[4] = 255;
376 pc->req_xfer = 18; 368 pc->req_xfer = 18;
377 pc->idefloppy_callback = &idefloppy_request_sense_callback;
378} 369}
379 370
380/* 371/*
@@ -397,174 +388,19 @@ static void idefloppy_retry_pc(ide_drive_t *drive)
397static ide_startstop_t idefloppy_pc_intr(ide_drive_t *drive) 388static ide_startstop_t idefloppy_pc_intr(ide_drive_t *drive)
398{ 389{
399 idefloppy_floppy_t *floppy = drive->driver_data; 390 idefloppy_floppy_t *floppy = drive->driver_data;
400 ide_hwif_t *hwif = drive->hwif;
401 struct ide_atapi_pc *pc = floppy->pc;
402 struct request *rq = pc->rq;
403 xfer_func_t *xferfunc;
404 unsigned int temp;
405 int dma_error = 0;
406 u16 bcount;
407 u8 stat, ireason;
408
409 debug_log("Reached %s interrupt handler\n", __func__);
410
411 if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
412 dma_error = hwif->dma_ops->dma_end(drive);
413 if (dma_error) {
414 printk(KERN_ERR "%s: DMA %s error\n", drive->name,
415 rq_data_dir(rq) ? "write" : "read");
416 pc->flags |= PC_FLAG_DMA_ERROR;
417 } else {
418 pc->xferred = pc->req_xfer;
419 idefloppy_update_buffers(drive, pc);
420 }
421 debug_log("DMA finished\n");
422 }
423
424 /* Clear the interrupt */
425 stat = ide_read_status(drive);
426
427 /* No more interrupts */
428 if ((stat & DRQ_STAT) == 0) {
429 debug_log("Packet command completed, %d bytes transferred\n",
430 pc->xferred);
431 pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
432
433 local_irq_enable_in_hardirq();
434
435 if ((stat & ERR_STAT) || (pc->flags & PC_FLAG_DMA_ERROR)) {
436 /* Error detected */
437 debug_log("%s: I/O error\n", drive->name);
438 rq->errors++;
439 if (pc->c[0] == GPCMD_REQUEST_SENSE) {
440 printk(KERN_ERR "ide-floppy: I/O error in "
441 "request sense command\n");
442 return ide_do_reset(drive);
443 }
444 /* Retry operation */
445 idefloppy_retry_pc(drive);
446 /* queued, but not started */
447 return ide_stopped;
448 }
449 pc->error = 0;
450 if (floppy->failed_pc == pc)
451 floppy->failed_pc = NULL;
452 /* Command finished - Call the callback function */
453 pc->idefloppy_callback(drive);
454 return ide_stopped;
455 }
456
457 if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
458 pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
459 printk(KERN_ERR "ide-floppy: The floppy wants to issue "
460 "more interrupts in DMA mode\n");
461 ide_dma_off(drive);
462 return ide_do_reset(drive);
463 }
464
465 /* Get the number of bytes to transfer */
466 bcount = (hwif->INB(hwif->io_ports.lbah_addr) << 8) |
467 hwif->INB(hwif->io_ports.lbam_addr);
468 /* on this interrupt */
469 ireason = hwif->INB(hwif->io_ports.nsect_addr);
470
471 if (ireason & CD) {
472 printk(KERN_ERR "ide-floppy: CoD != 0 in %s\n", __func__);
473 return ide_do_reset(drive);
474 }
475 if (((ireason & IO) == IO) == !!(pc->flags & PC_FLAG_WRITING)) {
476 /* Hopefully, we will never get here */
477 printk(KERN_ERR "ide-floppy: We wanted to %s, ",
478 (ireason & IO) ? "Write" : "Read");
479 printk(KERN_ERR "but the floppy wants us to %s !\n",
480 (ireason & IO) ? "Read" : "Write");
481 return ide_do_reset(drive);
482 }
483 if (!(pc->flags & PC_FLAG_WRITING)) {
484 /* Reading - Check that we have enough space */
485 temp = pc->xferred + bcount;
486 if (temp > pc->req_xfer) {
487 if (temp > pc->buf_size) {
488 printk(KERN_ERR "ide-floppy: The floppy wants "
489 "to send us more data than expected "
490 "- discarding data\n");
491 ide_pad_transfer(drive, 0, bcount);
492
493 ide_set_handler(drive,
494 &idefloppy_pc_intr,
495 IDEFLOPPY_WAIT_CMD,
496 NULL);
497 return ide_started;
498 }
499 debug_log("The floppy wants to send us more data than"
500 " expected - allowing transfer\n");
501 }
502 }
503 if (pc->flags & PC_FLAG_WRITING)
504 xferfunc = hwif->output_data;
505 else
506 xferfunc = hwif->input_data;
507
508 if (pc->buf)
509 xferfunc(drive, NULL, pc->cur_pos, bcount);
510 else
511 ide_floppy_io_buffers(drive, pc, bcount,
512 !!(pc->flags & PC_FLAG_WRITING));
513
514 /* Update the current position */
515 pc->xferred += bcount;
516 pc->cur_pos += bcount;
517
518 /* And set the interrupt handler again */
519 ide_set_handler(drive, &idefloppy_pc_intr, IDEFLOPPY_WAIT_CMD, NULL);
520 return ide_started;
521}
522
523/*
524 * This is the original routine that did the packet transfer.
525 * It fails at high speeds on the Iomega ZIP drive, so there's a slower version
526 * for that drive below. The algorithm is chosen based on drive type
527 */
528static ide_startstop_t idefloppy_transfer_pc(ide_drive_t *drive)
529{
530 ide_hwif_t *hwif = drive->hwif;
531 ide_startstop_t startstop;
532 idefloppy_floppy_t *floppy = drive->driver_data;
533 u8 ireason;
534
535 if (ide_wait_stat(&startstop, drive, DRQ_STAT, BUSY_STAT, WAIT_READY)) {
536 printk(KERN_ERR "ide-floppy: Strange, packet command "
537 "initiated yet DRQ isn't asserted\n");
538 return startstop;
539 }
540 ireason = hwif->INB(hwif->io_ports.nsect_addr);
541 if ((ireason & CD) == 0 || (ireason & IO)) {
542 printk(KERN_ERR "ide-floppy: (IO,CoD) != (0,1) while "
543 "issuing a packet command\n");
544 return ide_do_reset(drive);
545 }
546 391
547 /* Set the interrupt routine */ 392 return ide_pc_intr(drive, floppy->pc, idefloppy_pc_intr,
548 ide_set_handler(drive, &idefloppy_pc_intr, IDEFLOPPY_WAIT_CMD, NULL); 393 IDEFLOPPY_WAIT_CMD, NULL, idefloppy_update_buffers,
549 394 idefloppy_retry_pc, NULL, ide_floppy_io_buffers);
550 /* Send the actual packet */
551 hwif->output_data(drive, NULL, floppy->pc->c, 12);
552
553 return ide_started;
554} 395}
555 396
556
557/* 397/*
558 * What we have here is a classic case of a top half / bottom half interrupt 398 * What we have here is a classic case of a top half / bottom half interrupt
559 * service routine. In interrupt mode, the device sends an interrupt to signal 399 * service routine. In interrupt mode, the device sends an interrupt to signal
560 * that it is ready to receive a packet. However, we need to delay about 2-3 400 * that it is ready to receive a packet. However, we need to delay about 2-3
561 * ticks before issuing the packet or we gets in trouble. 401 * ticks before issuing the packet or we gets in trouble.
562 *
563 * So, follow carefully. transfer_pc1 is called as an interrupt (or directly).
564 * In either case, when the device says it's ready for a packet, we schedule
565 * the packet transfer to occur about 2-3 ticks later in transfer_pc2.
566 */ 402 */
567static int idefloppy_transfer_pc2(ide_drive_t *drive) 403static int idefloppy_transfer_pc(ide_drive_t *drive)
568{ 404{
569 idefloppy_floppy_t *floppy = drive->driver_data; 405 idefloppy_floppy_t *floppy = drive->driver_data;
570 406
@@ -575,24 +411,19 @@ static int idefloppy_transfer_pc2(ide_drive_t *drive)
575 return IDEFLOPPY_WAIT_CMD; 411 return IDEFLOPPY_WAIT_CMD;
576} 412}
577 413
578static ide_startstop_t idefloppy_transfer_pc1(ide_drive_t *drive) 414
415/*
416 * Called as an interrupt (or directly). When the device says it's ready for a
417 * packet, we schedule the packet transfer to occur about 2-3 ticks later in
418 * transfer_pc.
419 */
420static ide_startstop_t idefloppy_start_pc_transfer(ide_drive_t *drive)
579{ 421{
580 ide_hwif_t *hwif = drive->hwif;
581 idefloppy_floppy_t *floppy = drive->driver_data; 422 idefloppy_floppy_t *floppy = drive->driver_data;
582 ide_startstop_t startstop; 423 struct ide_atapi_pc *pc = floppy->pc;
583 u8 ireason; 424 ide_expiry_t *expiry;
425 unsigned int timeout;
584 426
585 if (ide_wait_stat(&startstop, drive, DRQ_STAT, BUSY_STAT, WAIT_READY)) {
586 printk(KERN_ERR "ide-floppy: Strange, packet command "
587 "initiated yet DRQ isn't asserted\n");
588 return startstop;
589 }
590 ireason = hwif->INB(hwif->io_ports.nsect_addr);
591 if ((ireason & CD) == 0 || (ireason & IO)) {
592 printk(KERN_ERR "ide-floppy: (IO,CoD) != (0,1) "
593 "while issuing a packet command\n");
594 return ide_do_reset(drive);
595 }
596 /* 427 /*
597 * The following delay solves a problem with ATAPI Zip 100 drives 428 * The following delay solves a problem with ATAPI Zip 100 drives
598 * where the Busy flag was apparently being deasserted before the 429 * where the Busy flag was apparently being deasserted before the
@@ -601,10 +432,15 @@ static ide_startstop_t idefloppy_transfer_pc1(ide_drive_t *drive)
601 * 40 and 50msec work well. idefloppy_pc_intr will not be actually 432 * 40 and 50msec work well. idefloppy_pc_intr will not be actually
602 * used until after the packet is moved in about 50 msec. 433 * used until after the packet is moved in about 50 msec.
603 */ 434 */
435 if (pc->flags & PC_FLAG_ZIP_DRIVE) {
436 timeout = floppy->ticks;
437 expiry = &idefloppy_transfer_pc;
438 } else {
439 timeout = IDEFLOPPY_WAIT_CMD;
440 expiry = NULL;
441 }
604 442
605 ide_set_handler(drive, &idefloppy_pc_intr, floppy->ticks, 443 return ide_transfer_pc(drive, pc, idefloppy_pc_intr, timeout, expiry);
606 &idefloppy_transfer_pc2);
607 return ide_started;
608} 444}
609 445
610static void ide_floppy_report_error(idefloppy_floppy_t *floppy, 446static void ide_floppy_report_error(idefloppy_floppy_t *floppy,
@@ -627,10 +463,6 @@ static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive,
627 struct ide_atapi_pc *pc) 463 struct ide_atapi_pc *pc)
628{ 464{
629 idefloppy_floppy_t *floppy = drive->driver_data; 465 idefloppy_floppy_t *floppy = drive->driver_data;
630 ide_hwif_t *hwif = drive->hwif;
631 ide_handler_t *pkt_xfer_routine;
632 u16 bcount;
633 u8 dma;
634 466
635 if (floppy->failed_pc == NULL && 467 if (floppy->failed_pc == NULL &&
636 pc->c[0] != GPCMD_REQUEST_SENSE) 468 pc->c[0] != GPCMD_REQUEST_SENSE)
@@ -645,65 +477,16 @@ static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive,
645 pc->error = IDEFLOPPY_ERROR_GENERAL; 477 pc->error = IDEFLOPPY_ERROR_GENERAL;
646 478
647 floppy->failed_pc = NULL; 479 floppy->failed_pc = NULL;
648 pc->idefloppy_callback(drive); 480 pc->callback(drive);
649 return ide_stopped; 481 return ide_stopped;
650 } 482 }
651 483
652 debug_log("Retry number - %d\n", pc->retries); 484 debug_log("Retry number - %d\n", pc->retries);
653 485
654 pc->retries++; 486 pc->retries++;
655 /* We haven't transferred any data yet */
656 pc->xferred = 0;
657 pc->cur_pos = pc->buf;
658 bcount = min(pc->req_xfer, 63 * 1024);
659
660 if (pc->flags & PC_FLAG_DMA_ERROR) {
661 pc->flags &= ~PC_FLAG_DMA_ERROR;
662 ide_dma_off(drive);
663 }
664 dma = 0;
665
666 if ((pc->flags & PC_FLAG_DMA_RECOMMENDED) && drive->using_dma)
667 dma = !hwif->dma_ops->dma_setup(drive);
668 487
669 ide_pktcmd_tf_load(drive, IDE_TFLAG_NO_SELECT_MASK | 488 return ide_issue_pc(drive, pc, idefloppy_start_pc_transfer,
670 IDE_TFLAG_OUT_DEVICE, bcount, dma); 489 IDEFLOPPY_WAIT_CMD, NULL);
671
672 if (dma) {
673 /* Begin DMA, if necessary */
674 pc->flags |= PC_FLAG_DMA_IN_PROGRESS;
675 hwif->dma_ops->dma_start(drive);
676 }
677
678 /* Can we transfer the packet when we get the interrupt or wait? */
679 if (floppy->flags & IDEFLOPPY_FLAG_ZIP_DRIVE) {
680 /* wait */
681 pkt_xfer_routine = &idefloppy_transfer_pc1;
682 } else {
683 /* immediate */
684 pkt_xfer_routine = &idefloppy_transfer_pc;
685 }
686
687 if (floppy->flags & IDEFLOPPY_FLAG_DRQ_INTERRUPT) {
688 /* Issue the packet command */
689 ide_execute_command(drive, WIN_PACKETCMD,
690 pkt_xfer_routine,
691 IDEFLOPPY_WAIT_CMD,
692 NULL);
693 return ide_started;
694 } else {
695 /* Issue the packet command */
696 ide_execute_pkt_cmd(drive);
697 return (*pkt_xfer_routine) (drive);
698 }
699}
700
701static void idefloppy_rw_callback(ide_drive_t *drive)
702{
703 debug_log("Reached %s\n", __func__);
704
705 idefloppy_end_request(drive, 1, 0);
706 return;
707} 490}
708 491
709static void idefloppy_create_prevent_cmd(struct ide_atapi_pc *pc, int prevent) 492static void idefloppy_create_prevent_cmd(struct ide_atapi_pc *pc, int prevent)
@@ -800,21 +583,19 @@ static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy,
800 put_unaligned(cpu_to_be16(blocks), (unsigned short *)&pc->c[7]); 583 put_unaligned(cpu_to_be16(blocks), (unsigned short *)&pc->c[7]);
801 put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[2]); 584 put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[2]);
802 585
803 pc->idefloppy_callback = &idefloppy_rw_callback;
804 pc->rq = rq; 586 pc->rq = rq;
805 pc->b_count = cmd == READ ? 0 : rq->bio->bi_size; 587 pc->b_count = cmd == READ ? 0 : rq->bio->bi_size;
806 if (rq->cmd_flags & REQ_RW) 588 if (rq->cmd_flags & REQ_RW)
807 pc->flags |= PC_FLAG_WRITING; 589 pc->flags |= PC_FLAG_WRITING;
808 pc->buf = NULL; 590 pc->buf = NULL;
809 pc->req_xfer = pc->buf_size = blocks * floppy->block_size; 591 pc->req_xfer = pc->buf_size = blocks * floppy->block_size;
810 pc->flags |= PC_FLAG_DMA_RECOMMENDED; 592 pc->flags |= PC_FLAG_DMA_OK;
811} 593}
812 594
813static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy, 595static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
814 struct ide_atapi_pc *pc, struct request *rq) 596 struct ide_atapi_pc *pc, struct request *rq)
815{ 597{
816 idefloppy_init_pc(pc); 598 idefloppy_init_pc(pc);
817 pc->idefloppy_callback = &idefloppy_rw_callback;
818 memcpy(pc->c, rq->cmd, sizeof(pc->c)); 599 memcpy(pc->c, rq->cmd, sizeof(pc->c));
819 pc->rq = rq; 600 pc->rq = rq;
820 pc->b_count = rq->data_len; 601 pc->b_count = rq->data_len;
@@ -822,7 +603,7 @@ static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy,
822 pc->flags |= PC_FLAG_WRITING; 603 pc->flags |= PC_FLAG_WRITING;
823 pc->buf = rq->data; 604 pc->buf = rq->data;
824 if (rq->bio) 605 if (rq->bio)
825 pc->flags |= PC_FLAG_DMA_RECOMMENDED; 606 pc->flags |= PC_FLAG_DMA_OK;
826 /* 607 /*
827 * possibly problematic, doesn't look like ide-floppy correctly 608 * possibly problematic, doesn't look like ide-floppy correctly
828 * handled scattered requests if dma fails... 609 * handled scattered requests if dma fails...
@@ -875,7 +656,14 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
875 return ide_stopped; 656 return ide_stopped;
876 } 657 }
877 658
659 if (floppy->flags & IDEFLOPPY_FLAG_DRQ_INTERRUPT)
660 pc->flags |= PC_FLAG_DRQ_INTERRUPT;
661
662 if (floppy->flags & IDEFLOPPY_FLAG_ZIP_DRIVE)
663 pc->flags |= PC_FLAG_ZIP_DRIVE;
664
878 pc->rq = rq; 665 pc->rq = rq;
666
879 return idefloppy_issue_pc(drive, pc); 667 return idefloppy_issue_pc(drive, pc);
880} 668}
881 669
@@ -886,14 +674,16 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive,
886static int idefloppy_queue_pc_tail(ide_drive_t *drive, struct ide_atapi_pc *pc) 674static int idefloppy_queue_pc_tail(ide_drive_t *drive, struct ide_atapi_pc *pc)
887{ 675{
888 struct ide_floppy_obj *floppy = drive->driver_data; 676 struct ide_floppy_obj *floppy = drive->driver_data;
889 struct request rq; 677 struct request *rq;
678 int error;
890 679
891 ide_init_drive_cmd(&rq); 680 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
892 rq.buffer = (char *) pc; 681 rq->buffer = (char *) pc;
893 rq.cmd_type = REQ_TYPE_SPECIAL; 682 rq->cmd_type = REQ_TYPE_SPECIAL;
894 rq.rq_disk = floppy->disk; 683 error = blk_execute_rq(drive->queue, floppy->disk, rq, 0);
684 blk_put_request(rq);
895 685
896 return ide_do_drive_cmd(drive, &rq, ide_wait); 686 return error;
897} 687}
898 688
899/* 689/*
@@ -1622,11 +1412,6 @@ static int ide_floppy_probe(ide_drive_t *drive)
1622 " of ide-floppy\n", drive->name); 1412 " of ide-floppy\n", drive->name);
1623 goto failed; 1413 goto failed;
1624 } 1414 }
1625 if (drive->scsi) {
1626 printk(KERN_INFO "ide-floppy: passing drive %s to ide-scsi"
1627 " emulation.\n", drive->name);
1628 goto failed;
1629 }
1630 floppy = kzalloc(sizeof(idefloppy_floppy_t), GFP_KERNEL); 1415 floppy = kzalloc(sizeof(idefloppy_floppy_t), GFP_KERNEL);
1631 if (!floppy) { 1416 if (!floppy) {
1632 printk(KERN_ERR "ide-floppy: %s: Can't allocate a floppy" 1417 printk(KERN_ERR "ide-floppy: %s: Can't allocate a floppy"
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 696525342e9a..28057747c1f8 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -358,31 +358,6 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
358 358
359EXPORT_SYMBOL(ide_end_drive_cmd); 359EXPORT_SYMBOL(ide_end_drive_cmd);
360 360
361/**
362 * try_to_flush_leftover_data - flush junk
363 * @drive: drive to flush
364 *
365 * try_to_flush_leftover_data() is invoked in response to a drive
366 * unexpectedly having its DRQ_STAT bit set. As an alternative to
367 * resetting the drive, this routine tries to clear the condition
368 * by read a sector's worth of data from the drive. Of course,
369 * this may not help if the drive is *waiting* for data from *us*.
370 */
371static void try_to_flush_leftover_data (ide_drive_t *drive)
372{
373 int i = (drive->mult_count ? drive->mult_count : 1) * SECTOR_WORDS;
374
375 if (drive->media != ide_disk)
376 return;
377 while (i > 0) {
378 u32 buffer[16];
379 u32 wcount = (i > 16) ? 16 : i;
380
381 i -= wcount;
382 drive->hwif->input_data(drive, NULL, buffer, wcount * 4);
383 }
384}
385
386static void ide_kill_rq(ide_drive_t *drive, struct request *rq) 361static void ide_kill_rq(ide_drive_t *drive, struct request *rq)
387{ 362{
388 if (rq->rq_disk) { 363 if (rq->rq_disk) {
@@ -422,8 +397,11 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq, u8
422 } 397 }
423 398
424 if ((stat & DRQ_STAT) && rq_data_dir(rq) == READ && 399 if ((stat & DRQ_STAT) && rq_data_dir(rq) == READ &&
425 (hwif->host_flags & IDE_HFLAG_ERROR_STOPS_FIFO) == 0) 400 (hwif->host_flags & IDE_HFLAG_ERROR_STOPS_FIFO) == 0) {
426 try_to_flush_leftover_data(drive); 401 int nsect = drive->mult_count ? drive->mult_count : 1;
402
403 ide_pad_transfer(drive, READ, nsect * SECTOR_SIZE);
404 }
427 405
428 if (rq->errors >= ERROR_MAX || blk_noretry_request(rq)) { 406 if (rq->errors >= ERROR_MAX || blk_noretry_request(rq)) {
429 ide_kill_rq(drive, rq); 407 ide_kill_rq(drive, rq);
@@ -459,7 +437,7 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq, u
459 437
460 if (ide_read_status(drive) & (BUSY_STAT | DRQ_STAT)) 438 if (ide_read_status(drive) & (BUSY_STAT | DRQ_STAT))
461 /* force an abort */ 439 /* force an abort */
462 hwif->OUTBSYNC(drive, WIN_IDLEIMMEDIATE, 440 hwif->OUTBSYNC(hwif, WIN_IDLEIMMEDIATE,
463 hwif->io_ports.command_addr); 441 hwif->io_ports.command_addr);
464 442
465 if (rq->errors >= ERROR_MAX) { 443 if (rq->errors >= ERROR_MAX) {
@@ -1539,88 +1517,30 @@ irqreturn_t ide_intr (int irq, void *dev_id)
1539} 1517}
1540 1518
1541/** 1519/**
1542 * ide_init_drive_cmd - initialize a drive command request
1543 * @rq: request object
1544 *
1545 * Initialize a request before we fill it in and send it down to
1546 * ide_do_drive_cmd. Commands must be set up by this function. Right
1547 * now it doesn't do a lot, but if that changes abusers will have a
1548 * nasty surprise.
1549 */
1550
1551void ide_init_drive_cmd (struct request *rq)
1552{
1553 blk_rq_init(NULL, rq);
1554}
1555
1556EXPORT_SYMBOL(ide_init_drive_cmd);
1557
1558/**
1559 * ide_do_drive_cmd - issue IDE special command 1520 * ide_do_drive_cmd - issue IDE special command
1560 * @drive: device to issue command 1521 * @drive: device to issue command
1561 * @rq: request to issue 1522 * @rq: request to issue
1562 * @action: action for processing
1563 * 1523 *
1564 * This function issues a special IDE device request 1524 * This function issues a special IDE device request
1565 * onto the request queue. 1525 * onto the request queue.
1566 * 1526 *
1567 * If action is ide_wait, then the rq is queued at the end of the 1527 * the rq is queued at the head of the request queue, displacing
1568 * request queue, and the function sleeps until it has been processed. 1528 * the currently-being-processed request and this function
1569 * This is for use when invoked from an ioctl handler. 1529 * returns immediately without waiting for the new rq to be
1570 * 1530 * completed. This is VERY DANGEROUS, and is intended for
1571 * If action is ide_preempt, then the rq is queued at the head of 1531 * careful use by the ATAPI tape/cdrom driver code.
1572 * the request queue, displacing the currently-being-processed
1573 * request and this function returns immediately without waiting
1574 * for the new rq to be completed. This is VERY DANGEROUS, and is
1575 * intended for careful use by the ATAPI tape/cdrom driver code.
1576 *
1577 * If action is ide_end, then the rq is queued at the end of the
1578 * request queue, and the function returns immediately without waiting
1579 * for the new rq to be completed. This is again intended for careful
1580 * use by the ATAPI tape/cdrom driver code.
1581 */ 1532 */
1582 1533
1583int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t action) 1534void ide_do_drive_cmd(ide_drive_t *drive, struct request *rq)
1584{ 1535{
1585 unsigned long flags; 1536 unsigned long flags;
1586 ide_hwgroup_t *hwgroup = HWGROUP(drive); 1537 ide_hwgroup_t *hwgroup = HWGROUP(drive);
1587 DECLARE_COMPLETION_ONSTACK(wait);
1588 int where = ELEVATOR_INSERT_BACK, err;
1589 int must_wait = (action == ide_wait || action == ide_head_wait);
1590
1591 rq->errors = 0;
1592
1593 /*
1594 * we need to hold an extra reference to request for safe inspection
1595 * after completion
1596 */
1597 if (must_wait) {
1598 rq->ref_count++;
1599 rq->end_io_data = &wait;
1600 rq->end_io = blk_end_sync_rq;
1601 }
1602 1538
1603 spin_lock_irqsave(&ide_lock, flags); 1539 spin_lock_irqsave(&ide_lock, flags);
1604 if (action == ide_preempt) 1540 hwgroup->rq = NULL;
1605 hwgroup->rq = NULL; 1541 __elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 1);
1606 if (action == ide_preempt || action == ide_head_wait) { 1542 __generic_unplug_device(drive->queue);
1607 where = ELEVATOR_INSERT_FRONT;
1608 rq->cmd_flags |= REQ_PREEMPT;
1609 }
1610 __elv_add_request(drive->queue, rq, where, 0);
1611 ide_do_request(hwgroup, IDE_NO_IRQ);
1612 spin_unlock_irqrestore(&ide_lock, flags); 1543 spin_unlock_irqrestore(&ide_lock, flags);
1613
1614 err = 0;
1615 if (must_wait) {
1616 wait_for_completion(&wait);
1617 if (rq->errors)
1618 err = -EIO;
1619
1620 blk_put_request(rq);
1621 }
1622
1623 return err;
1624} 1544}
1625 1545
1626EXPORT_SYMBOL(ide_do_drive_cmd); 1546EXPORT_SYMBOL(ide_do_drive_cmd);
@@ -1637,6 +1557,8 @@ void ide_pktcmd_tf_load(ide_drive_t *drive, u32 tf_flags, u16 bcount, u8 dma)
1637 task.tf.lbah = (bcount >> 8) & 0xff; 1557 task.tf.lbah = (bcount >> 8) & 0xff;
1638 1558
1639 ide_tf_dump(drive->name, &task.tf); 1559 ide_tf_dump(drive->name, &task.tf);
1560 ide_set_irq(drive, 1);
1561 SELECT_MASK(drive, 0);
1640 drive->hwif->tf_load(drive, &task); 1562 drive->hwif->tf_load(drive, &task);
1641} 1563}
1642 1564
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 0daf923541ff..80ad4f234f3f 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -42,7 +42,7 @@ static void ide_outb (u8 val, unsigned long port)
42 outb(val, port); 42 outb(val, port);
43} 43}
44 44
45static void ide_outbsync (ide_drive_t *drive, u8 addr, unsigned long port) 45static void ide_outbsync(ide_hwif_t *hwif, u8 addr, unsigned long port)
46{ 46{
47 outb(addr, port); 47 outb(addr, port);
48} 48}
@@ -68,7 +68,7 @@ static void ide_mm_outb (u8 value, unsigned long port)
68 writeb(value, (void __iomem *) port); 68 writeb(value, (void __iomem *) port);
69} 69}
70 70
71static void ide_mm_outbsync (ide_drive_t *drive, u8 value, unsigned long port) 71static void ide_mm_outbsync(ide_hwif_t *hwif, u8 value, unsigned long port)
72{ 72{
73 writeb(value, (void __iomem *) port); 73 writeb(value, (void __iomem *) port);
74} 74}
@@ -95,7 +95,7 @@ void SELECT_DRIVE (ide_drive_t *drive)
95 hwif->OUTB(drive->select.all, hwif->io_ports.device_addr); 95 hwif->OUTB(drive->select.all, hwif->io_ports.device_addr);
96} 96}
97 97
98static void SELECT_MASK(ide_drive_t *drive, int mask) 98void SELECT_MASK(ide_drive_t *drive, int mask)
99{ 99{
100 const struct ide_port_ops *port_ops = drive->hwif->port_ops; 100 const struct ide_port_ops *port_ops = drive->hwif->port_ops;
101 101
@@ -120,11 +120,6 @@ static void ide_tf_load(ide_drive_t *drive, ide_task_t *task)
120 if (task->tf_flags & IDE_TFLAG_FLAGGED) 120 if (task->tf_flags & IDE_TFLAG_FLAGGED)
121 HIHI = 0xFF; 121 HIHI = 0xFF;
122 122
123 ide_set_irq(drive, 1);
124
125 if ((task->tf_flags & IDE_TFLAG_NO_SELECT_MASK) == 0)
126 SELECT_MASK(drive, 0);
127
128 if (task->tf_flags & IDE_TFLAG_OUT_DATA) { 123 if (task->tf_flags & IDE_TFLAG_OUT_DATA) {
129 u16 data = (tf->hob_data << 8) | tf->data; 124 u16 data = (tf->hob_data << 8) | tf->data;
130 125
@@ -191,7 +186,7 @@ static void ide_tf_read(ide_drive_t *drive, ide_task_t *task)
191 } 186 }
192 187
193 /* be sure we're looking at the low order bits */ 188 /* be sure we're looking at the low order bits */
194 tf_outb(drive->ctl & ~0x80, io_ports->ctl_addr); 189 tf_outb(ATA_DEVCTL_OBS & ~0x80, io_ports->ctl_addr);
195 190
196 if (task->tf_flags & IDE_TFLAG_IN_NSECT) 191 if (task->tf_flags & IDE_TFLAG_IN_NSECT)
197 tf->nsect = tf_inb(io_ports->nsect_addr); 192 tf->nsect = tf_inb(io_ports->nsect_addr);
@@ -205,7 +200,7 @@ static void ide_tf_read(ide_drive_t *drive, ide_task_t *task)
205 tf->device = tf_inb(io_ports->device_addr); 200 tf->device = tf_inb(io_ports->device_addr);
206 201
207 if (task->tf_flags & IDE_TFLAG_LBA48) { 202 if (task->tf_flags & IDE_TFLAG_LBA48) {
208 tf_outb(drive->ctl | 0x80, io_ports->ctl_addr); 203 tf_outb(ATA_DEVCTL_OBS | 0x80, io_ports->ctl_addr);
209 204
210 if (task->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) 205 if (task->tf_flags & IDE_TFLAG_IN_HOB_FEATURE)
211 tf->hob_feature = tf_inb(io_ports->feature_addr); 206 tf->hob_feature = tf_inb(io_ports->feature_addr);
@@ -689,9 +684,9 @@ int ide_driveid_update(ide_drive_t *drive)
689 */ 684 */
690 685
691 SELECT_MASK(drive, 1); 686 SELECT_MASK(drive, 1);
692 ide_set_irq(drive, 1); 687 ide_set_irq(drive, 0);
693 msleep(50); 688 msleep(50);
694 hwif->OUTBSYNC(drive, WIN_IDENTIFY, hwif->io_ports.command_addr); 689 hwif->OUTBSYNC(hwif, WIN_IDENTIFY, hwif->io_ports.command_addr);
695 timeout = jiffies + WAIT_WORSTCASE; 690 timeout = jiffies + WAIT_WORSTCASE;
696 do { 691 do {
697 if (time_after(jiffies, timeout)) { 692 if (time_after(jiffies, timeout)) {
@@ -744,9 +739,6 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
744 int error = 0; 739 int error = 0;
745 u8 stat; 740 u8 stat;
746 741
747// while (HWGROUP(drive)->busy)
748// msleep(50);
749
750#ifdef CONFIG_BLK_DEV_IDEDMA 742#ifdef CONFIG_BLK_DEV_IDEDMA
751 if (hwif->dma_ops) /* check if host supports DMA */ 743 if (hwif->dma_ops) /* check if host supports DMA */
752 hwif->dma_ops->dma_host_set(drive, 0); 744 hwif->dma_ops->dma_host_set(drive, 0);
@@ -781,7 +773,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
781 ide_set_irq(drive, 0); 773 ide_set_irq(drive, 0);
782 hwif->OUTB(speed, io_ports->nsect_addr); 774 hwif->OUTB(speed, io_ports->nsect_addr);
783 hwif->OUTB(SETFEATURES_XFER, io_ports->feature_addr); 775 hwif->OUTB(SETFEATURES_XFER, io_ports->feature_addr);
784 hwif->OUTBSYNC(drive, WIN_SETFEATURES, io_ports->command_addr); 776 hwif->OUTBSYNC(hwif, WIN_SETFEATURES, io_ports->command_addr);
785 if (drive->quirk_list == 2) 777 if (drive->quirk_list == 2)
786 ide_set_irq(drive, 1); 778 ide_set_irq(drive, 1);
787 779
@@ -889,7 +881,7 @@ void ide_execute_command(ide_drive_t *drive, u8 cmd, ide_handler_t *handler,
889 881
890 spin_lock_irqsave(&ide_lock, flags); 882 spin_lock_irqsave(&ide_lock, flags);
891 __ide_set_handler(drive, handler, timeout, expiry); 883 __ide_set_handler(drive, handler, timeout, expiry);
892 hwif->OUTBSYNC(drive, cmd, hwif->io_ports.command_addr); 884 hwif->OUTBSYNC(hwif, cmd, hwif->io_ports.command_addr);
893 /* 885 /*
894 * Drive takes 400nS to respond, we must avoid the IRQ being 886 * Drive takes 400nS to respond, we must avoid the IRQ being
895 * serviced before that. 887 * serviced before that.
@@ -907,7 +899,7 @@ void ide_execute_pkt_cmd(ide_drive_t *drive)
907 unsigned long flags; 899 unsigned long flags;
908 900
909 spin_lock_irqsave(&ide_lock, flags); 901 spin_lock_irqsave(&ide_lock, flags);
910 hwif->OUTBSYNC(drive, WIN_PACKETCMD, hwif->io_ports.command_addr); 902 hwif->OUTBSYNC(hwif, WIN_PACKETCMD, hwif->io_ports.command_addr);
911 ndelay(400); 903 ndelay(400);
912 spin_unlock_irqrestore(&ide_lock, flags); 904 spin_unlock_irqrestore(&ide_lock, flags);
913} 905}
@@ -1102,7 +1094,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
1102 pre_reset(drive); 1094 pre_reset(drive);
1103 SELECT_DRIVE(drive); 1095 SELECT_DRIVE(drive);
1104 udelay (20); 1096 udelay (20);
1105 hwif->OUTBSYNC(drive, WIN_SRST, io_ports->command_addr); 1097 hwif->OUTBSYNC(hwif, WIN_SRST, io_ports->command_addr);
1106 ndelay(400); 1098 ndelay(400);
1107 hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE; 1099 hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
1108 hwgroup->polling = 1; 1100 hwgroup->polling = 1;
@@ -1133,14 +1125,14 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
1133 * recover from reset very quickly, saving us the first 50ms wait time. 1125 * recover from reset very quickly, saving us the first 50ms wait time.
1134 */ 1126 */
1135 /* set SRST and nIEN */ 1127 /* set SRST and nIEN */
1136 hwif->OUTBSYNC(drive, drive->ctl|6, io_ports->ctl_addr); 1128 hwif->OUTBSYNC(hwif, ATA_DEVCTL_OBS | 6, io_ports->ctl_addr);
1137 /* more than enough time */ 1129 /* more than enough time */
1138 udelay(10); 1130 udelay(10);
1139 if (drive->quirk_list == 2) 1131 if (drive->quirk_list == 2)
1140 ctl = drive->ctl; /* clear SRST and nIEN */ 1132 ctl = ATA_DEVCTL_OBS; /* clear SRST and nIEN */
1141 else 1133 else
1142 ctl = drive->ctl | 2; /* clear SRST, leave nIEN */ 1134 ctl = ATA_DEVCTL_OBS | 2; /* clear SRST, leave nIEN */
1143 hwif->OUTBSYNC(drive, ctl, io_ports->ctl_addr); 1135 hwif->OUTBSYNC(hwif, ctl, io_ports->ctl_addr);
1144 /* more than enough time */ 1136 /* more than enough time */
1145 udelay(10); 1137 udelay(10);
1146 hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE; 1138 hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 26e68b65b7cf..d21e51a02c3e 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -293,7 +293,7 @@ static int actual_try_to_identify (ide_drive_t *drive, u8 cmd)
293 hwif->OUTB(0, io_ports->feature_addr); 293 hwif->OUTB(0, io_ports->feature_addr);
294 294
295 /* ask drive for ID */ 295 /* ask drive for ID */
296 hwif->OUTBSYNC(drive, cmd, io_ports->command_addr); 296 hwif->OUTBSYNC(hwif, cmd, hwif->io_ports.command_addr);
297 297
298 timeout = ((cmd == WIN_IDENTIFY) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2; 298 timeout = ((cmd == WIN_IDENTIFY) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2;
299 timeout += jiffies; 299 timeout += jiffies;
@@ -478,9 +478,9 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
478 printk(KERN_ERR "%s: no response (status = 0x%02x), " 478 printk(KERN_ERR "%s: no response (status = 0x%02x), "
479 "resetting drive\n", drive->name, stat); 479 "resetting drive\n", drive->name, stat);
480 msleep(50); 480 msleep(50);
481 hwif->OUTB(drive->select.all, io_ports->device_addr); 481 SELECT_DRIVE(drive);
482 msleep(50); 482 msleep(50);
483 hwif->OUTBSYNC(drive, WIN_SRST, io_ports->command_addr); 483 hwif->OUTBSYNC(hwif, WIN_SRST, io_ports->command_addr);
484 (void)ide_busy_sleep(hwif); 484 (void)ide_busy_sleep(hwif);
485 rc = try_to_identify(drive, cmd); 485 rc = try_to_identify(drive, cmd);
486 } 486 }
@@ -516,7 +516,7 @@ static void enable_nest (ide_drive_t *drive)
516 printk("%s: enabling %s -- ", hwif->name, drive->id->model); 516 printk("%s: enabling %s -- ", hwif->name, drive->id->model);
517 SELECT_DRIVE(drive); 517 SELECT_DRIVE(drive);
518 msleep(50); 518 msleep(50);
519 hwif->OUTBSYNC(drive, EXABYTE_ENABLE_NEST, hwif->io_ports.command_addr); 519 hwif->OUTBSYNC(hwif, EXABYTE_ENABLE_NEST, hwif->io_ports.command_addr);
520 520
521 if (ide_busy_sleep(hwif)) { 521 if (ide_busy_sleep(hwif)) {
522 printk(KERN_CONT "failed (timeout)\n"); 522 printk(KERN_CONT "failed (timeout)\n");
@@ -1065,7 +1065,7 @@ static int init_irq (ide_hwif_t *hwif)
1065 1065
1066 if (io_ports->ctl_addr) 1066 if (io_ports->ctl_addr)
1067 /* clear nIEN */ 1067 /* clear nIEN */
1068 hwif->OUTB(0x08, io_ports->ctl_addr); 1068 hwif->OUTBSYNC(hwif, ATA_DEVCTL_OBS, io_ports->ctl_addr);
1069 1069
1070 if (request_irq(hwif->irq,&ide_intr,sa,hwif->name,hwgroup)) 1070 if (request_irq(hwif->irq,&ide_intr,sa,hwif->name,hwgroup))
1071 goto out_unlink; 1071 goto out_unlink;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index a3d228302d20..f9cf1670e4e1 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -144,9 +144,6 @@ enum {
144 144
145/*************************** End of tunable parameters ***********************/ 145/*************************** End of tunable parameters ***********************/
146 146
147/* Read/Write error simulation */
148#define SIMULATE_ERRORS 0
149
150/* tape directions */ 147/* tape directions */
151enum { 148enum {
152 IDETAPE_DIR_NONE = (1 << 0), 149 IDETAPE_DIR_NONE = (1 << 0),
@@ -442,7 +439,7 @@ static void idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
442 } 439 }
443} 440}
444 441
445static void idetape_update_buffers(struct ide_atapi_pc *pc) 442static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc)
446{ 443{
447 struct idetape_bh *bh = pc->bh; 444 struct idetape_bh *bh = pc->bh;
448 int count; 445 int count;
@@ -506,18 +503,6 @@ static struct request *idetape_next_rq_storage(ide_drive_t *drive)
506 return (&tape->rq_stack[tape->rq_stack_index++]); 503 return (&tape->rq_stack[tape->rq_stack_index++]);
507} 504}
508 505
509static void idetape_init_pc(struct ide_atapi_pc *pc)
510{
511 memset(pc->c, 0, 12);
512 pc->retries = 0;
513 pc->flags = 0;
514 pc->req_xfer = 0;
515 pc->buf = pc->pc_buf;
516 pc->buf_size = IDETAPE_PC_BUFFER_SIZE;
517 pc->bh = NULL;
518 pc->b_data = NULL;
519}
520
521/* 506/*
522 * called on each failed packet command retry to analyze the request sense. We 507 * called on each failed packet command retry to analyze the request sense. We
523 * currently do not utilize this information. 508 * currently do not utilize this information.
@@ -538,8 +523,8 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense)
538 if (pc->flags & PC_FLAG_DMA_ERROR) { 523 if (pc->flags & PC_FLAG_DMA_ERROR) {
539 pc->xferred = pc->req_xfer - 524 pc->xferred = pc->req_xfer -
540 tape->blk_size * 525 tape->blk_size *
541 be32_to_cpu(get_unaligned((u32 *)&sense[3])); 526 get_unaligned_be32(&sense[3]);
542 idetape_update_buffers(pc); 527 idetape_update_buffers(drive, pc);
543 } 528 }
544 529
545 /* 530 /*
@@ -634,21 +619,78 @@ static int idetape_end_request(ide_drive_t *drive, int uptodate, int nr_sects)
634 return 0; 619 return 0;
635} 620}
636 621
637static ide_startstop_t idetape_request_sense_callback(ide_drive_t *drive) 622static void ide_tape_callback(ide_drive_t *drive)
638{ 623{
639 idetape_tape_t *tape = drive->driver_data; 624 idetape_tape_t *tape = drive->driver_data;
625 struct ide_atapi_pc *pc = tape->pc;
626 int uptodate = pc->error ? 0 : 1;
640 627
641 debug_log(DBG_PROCS, "Enter %s\n", __func__); 628 debug_log(DBG_PROCS, "Enter %s\n", __func__);
642 629
643 if (!tape->pc->error) { 630 if (tape->failed_pc == pc)
644 idetape_analyze_error(drive, tape->pc->buf); 631 tape->failed_pc = NULL;
645 idetape_end_request(drive, 1, 0); 632
646 } else { 633 if (pc->c[0] == REQUEST_SENSE) {
647 printk(KERN_ERR "ide-tape: Error in REQUEST SENSE itself - " 634 if (uptodate)
648 "Aborting request!\n"); 635 idetape_analyze_error(drive, pc->buf);
649 idetape_end_request(drive, 0, 0); 636 else
637 printk(KERN_ERR "ide-tape: Error in REQUEST SENSE "
638 "itself - Aborting request!\n");
639 } else if (pc->c[0] == READ_6 || pc->c[0] == WRITE_6) {
640 struct request *rq = drive->hwif->hwgroup->rq;
641 int blocks = pc->xferred / tape->blk_size;
642
643 tape->avg_size += blocks * tape->blk_size;
644
645 if (time_after_eq(jiffies, tape->avg_time + HZ)) {
646 tape->avg_speed = tape->avg_size * HZ /
647 (jiffies - tape->avg_time) / 1024;
648 tape->avg_size = 0;
649 tape->avg_time = jiffies;
650 }
651
652 tape->first_frame += blocks;
653 rq->current_nr_sectors -= blocks;
654
655 if (pc->error)
656 uptodate = pc->error;
657 } else if (pc->c[0] == READ_POSITION && uptodate) {
658 u8 *readpos = tape->pc->buf;
659
660 debug_log(DBG_SENSE, "BOP - %s\n",
661 (readpos[0] & 0x80) ? "Yes" : "No");
662 debug_log(DBG_SENSE, "EOP - %s\n",
663 (readpos[0] & 0x40) ? "Yes" : "No");
664
665 if (readpos[0] & 0x4) {
666 printk(KERN_INFO "ide-tape: Block location is unknown"
667 "to the tape\n");
668 clear_bit(IDETAPE_FLAG_ADDRESS_VALID, &tape->flags);
669 uptodate = 0;
670 } else {
671 debug_log(DBG_SENSE, "Block Location - %u\n",
672 be32_to_cpu(*(u32 *)&readpos[4]));
673
674 tape->partition = readpos[1];
675 tape->first_frame = be32_to_cpu(*(u32 *)&readpos[4]);
676 set_bit(IDETAPE_FLAG_ADDRESS_VALID, &tape->flags);
677 }
650 } 678 }
651 return ide_stopped; 679
680 idetape_end_request(drive, uptodate, 0);
681}
682
683static void idetape_init_pc(struct ide_atapi_pc *pc)
684{
685 memset(pc->c, 0, 12);
686 pc->retries = 0;
687 pc->flags = 0;
688 pc->req_xfer = 0;
689 pc->buf = pc->pc_buf;
690 pc->buf_size = IDETAPE_PC_BUFFER_SIZE;
691 pc->bh = NULL;
692 pc->b_data = NULL;
693 pc->callback = ide_tape_callback;
652} 694}
653 695
654static void idetape_create_request_sense_cmd(struct ide_atapi_pc *pc) 696static void idetape_create_request_sense_cmd(struct ide_atapi_pc *pc)
@@ -657,7 +699,6 @@ static void idetape_create_request_sense_cmd(struct ide_atapi_pc *pc)
657 pc->c[0] = REQUEST_SENSE; 699 pc->c[0] = REQUEST_SENSE;
658 pc->c[4] = 20; 700 pc->c[4] = 20;
659 pc->req_xfer = 20; 701 pc->req_xfer = 20;
660 pc->idetape_callback = &idetape_request_sense_callback;
661} 702}
662 703
663static void idetape_init_rq(struct request *rq, u8 cmd) 704static void idetape_init_rq(struct request *rq, u8 cmd)
@@ -688,9 +729,10 @@ static void idetape_queue_pc_head(ide_drive_t *drive, struct ide_atapi_pc *pc,
688 struct ide_tape_obj *tape = drive->driver_data; 729 struct ide_tape_obj *tape = drive->driver_data;
689 730
690 idetape_init_rq(rq, REQ_IDETAPE_PC1); 731 idetape_init_rq(rq, REQ_IDETAPE_PC1);
732 rq->cmd_flags |= REQ_PREEMPT;
691 rq->buffer = (char *) pc; 733 rq->buffer = (char *) pc;
692 rq->rq_disk = tape->disk; 734 rq->rq_disk = tape->disk;
693 (void) ide_do_drive_cmd(drive, rq, ide_preempt); 735 ide_do_drive_cmd(drive, rq);
694} 736}
695 737
696/* 738/*
@@ -698,7 +740,7 @@ static void idetape_queue_pc_head(ide_drive_t *drive, struct ide_atapi_pc *pc,
698 * last packet command. We queue a request sense packet command in 740 * last packet command. We queue a request sense packet command in
699 * the head of the request list. 741 * the head of the request list.
700 */ 742 */
701static ide_startstop_t idetape_retry_pc (ide_drive_t *drive) 743static void idetape_retry_pc(ide_drive_t *drive)
702{ 744{
703 idetape_tape_t *tape = drive->driver_data; 745 idetape_tape_t *tape = drive->driver_data;
704 struct ide_atapi_pc *pc; 746 struct ide_atapi_pc *pc;
@@ -710,7 +752,6 @@ static ide_startstop_t idetape_retry_pc (ide_drive_t *drive)
710 idetape_create_request_sense_cmd(pc); 752 idetape_create_request_sense_cmd(pc);
711 set_bit(IDETAPE_FLAG_IGNORE_DSC, &tape->flags); 753 set_bit(IDETAPE_FLAG_IGNORE_DSC, &tape->flags);
712 idetape_queue_pc_head(drive, pc, rq); 754 idetape_queue_pc_head(drive, pc, rq);
713 return ide_stopped;
714} 755}
715 756
716/* 757/*
@@ -727,7 +768,26 @@ static void idetape_postpone_request(ide_drive_t *drive)
727 ide_stall_queue(drive, tape->dsc_poll_freq); 768 ide_stall_queue(drive, tape->dsc_poll_freq);
728} 769}
729 770
730typedef void idetape_io_buf(ide_drive_t *, struct ide_atapi_pc *, unsigned int); 771static void ide_tape_handle_dsc(ide_drive_t *drive)
772{
773 idetape_tape_t *tape = drive->driver_data;
774
775 /* Media access command */
776 tape->dsc_polling_start = jiffies;
777 tape->dsc_poll_freq = IDETAPE_DSC_MA_FAST;
778 tape->dsc_timeout = jiffies + IDETAPE_DSC_MA_TIMEOUT;
779 /* Allow ide.c to handle other requests */
780 idetape_postpone_request(drive);
781}
782
783static void ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
784 unsigned int bcount, int write)
785{
786 if (write)
787 idetape_output_buffers(drive, pc, bcount);
788 else
789 idetape_input_buffers(drive, pc, bcount);
790}
731 791
732/* 792/*
733 * This is the usual interrupt handler which will be called during a packet 793 * This is the usual interrupt handler which will be called during a packet
@@ -738,169 +798,11 @@ typedef void idetape_io_buf(ide_drive_t *, struct ide_atapi_pc *, unsigned int);
738 */ 798 */
739static ide_startstop_t idetape_pc_intr(ide_drive_t *drive) 799static ide_startstop_t idetape_pc_intr(ide_drive_t *drive)
740{ 800{
741 ide_hwif_t *hwif = drive->hwif;
742 idetape_tape_t *tape = drive->driver_data; 801 idetape_tape_t *tape = drive->driver_data;
743 struct ide_atapi_pc *pc = tape->pc;
744 xfer_func_t *xferfunc;
745 idetape_io_buf *iobuf;
746 unsigned int temp;
747#if SIMULATE_ERRORS
748 static int error_sim_count;
749#endif
750 u16 bcount;
751 u8 stat, ireason;
752
753 debug_log(DBG_PROCS, "Enter %s - interrupt handler\n", __func__);
754
755 /* Clear the interrupt */
756 stat = ide_read_status(drive);
757
758 if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
759 if (hwif->dma_ops->dma_end(drive) || (stat & ERR_STAT)) {
760 /*
761 * A DMA error is sometimes expected. For example,
762 * if the tape is crossing a filemark during a
763 * READ command, it will issue an irq and position
764 * itself before the filemark, so that only a partial
765 * data transfer will occur (which causes the DMA
766 * error). In that case, we will later ask the tape
767 * how much bytes of the original request were
768 * actually transferred (we can't receive that
769 * information from the DMA engine on most chipsets).
770 */
771
772 /*
773 * On the contrary, a DMA error is never expected;
774 * it usually indicates a hardware error or abort.
775 * If the tape crosses a filemark during a READ
776 * command, it will issue an irq and position itself
777 * after the filemark (not before). Only a partial
778 * data transfer will occur, but no DMA error.
779 * (AS, 19 Apr 2001)
780 */
781 pc->flags |= PC_FLAG_DMA_ERROR;
782 } else {
783 pc->xferred = pc->req_xfer;
784 idetape_update_buffers(pc);
785 }
786 debug_log(DBG_PROCS, "DMA finished\n");
787
788 }
789
790 /* No more interrupts */
791 if ((stat & DRQ_STAT) == 0) {
792 debug_log(DBG_SENSE, "Packet command completed, %d bytes"
793 " transferred\n", pc->xferred);
794
795 pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
796 local_irq_enable();
797
798#if SIMULATE_ERRORS
799 if ((pc->c[0] == WRITE_6 || pc->c[0] == READ_6) &&
800 (++error_sim_count % 100) == 0) {
801 printk(KERN_INFO "ide-tape: %s: simulating error\n",
802 tape->name);
803 stat |= ERR_STAT;
804 }
805#endif
806 if ((stat & ERR_STAT) && pc->c[0] == REQUEST_SENSE)
807 stat &= ~ERR_STAT;
808 if ((stat & ERR_STAT) || (pc->flags & PC_FLAG_DMA_ERROR)) {
809 /* Error detected */
810 debug_log(DBG_ERR, "%s: I/O error\n", tape->name);
811
812 if (pc->c[0] == REQUEST_SENSE) {
813 printk(KERN_ERR "ide-tape: I/O error in request"
814 " sense command\n");
815 return ide_do_reset(drive);
816 }
817 debug_log(DBG_ERR, "[cmd %x]: check condition\n",
818 pc->c[0]);
819
820 /* Retry operation */
821 return idetape_retry_pc(drive);
822 }
823 pc->error = 0;
824 if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) &&
825 (stat & SEEK_STAT) == 0) {
826 /* Media access command */
827 tape->dsc_polling_start = jiffies;
828 tape->dsc_poll_freq = IDETAPE_DSC_MA_FAST;
829 tape->dsc_timeout = jiffies + IDETAPE_DSC_MA_TIMEOUT;
830 /* Allow ide.c to handle other requests */
831 idetape_postpone_request(drive);
832 return ide_stopped;
833 }
834 if (tape->failed_pc == pc)
835 tape->failed_pc = NULL;
836 /* Command finished - Call the callback function */
837 return pc->idetape_callback(drive);
838 }
839
840 if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
841 pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
842 printk(KERN_ERR "ide-tape: The tape wants to issue more "
843 "interrupts in DMA mode\n");
844 printk(KERN_ERR "ide-tape: DMA disabled, reverting to PIO\n");
845 ide_dma_off(drive);
846 return ide_do_reset(drive);
847 }
848 /* Get the number of bytes to transfer on this interrupt. */
849 bcount = (hwif->INB(hwif->io_ports.lbah_addr) << 8) |
850 hwif->INB(hwif->io_ports.lbam_addr);
851
852 ireason = hwif->INB(hwif->io_ports.nsect_addr);
853
854 if (ireason & CD) {
855 printk(KERN_ERR "ide-tape: CoD != 0 in %s\n", __func__);
856 return ide_do_reset(drive);
857 }
858 if (((ireason & IO) == IO) == !!(pc->flags & PC_FLAG_WRITING)) {
859 /* Hopefully, we will never get here */
860 printk(KERN_ERR "ide-tape: We wanted to %s, ",
861 (ireason & IO) ? "Write" : "Read");
862 printk(KERN_ERR "ide-tape: but the tape wants us to %s !\n",
863 (ireason & IO) ? "Read" : "Write");
864 return ide_do_reset(drive);
865 }
866 if (!(pc->flags & PC_FLAG_WRITING)) {
867 /* Reading - Check that we have enough space */
868 temp = pc->xferred + bcount;
869 if (temp > pc->req_xfer) {
870 if (temp > pc->buf_size) {
871 printk(KERN_ERR "ide-tape: The tape wants to "
872 "send us more data than expected "
873 "- discarding data\n");
874 ide_pad_transfer(drive, 0, bcount);
875 ide_set_handler(drive, &idetape_pc_intr,
876 IDETAPE_WAIT_CMD, NULL);
877 return ide_started;
878 }
879 debug_log(DBG_SENSE, "The tape wants to send us more "
880 "data than expected - allowing transfer\n");
881 }
882 iobuf = &idetape_input_buffers;
883 xferfunc = hwif->input_data;
884 } else {
885 iobuf = &idetape_output_buffers;
886 xferfunc = hwif->output_data;
887 }
888
889 if (pc->bh)
890 iobuf(drive, pc, bcount);
891 else
892 xferfunc(drive, NULL, pc->cur_pos, bcount);
893
894 /* Update the current position */
895 pc->xferred += bcount;
896 pc->cur_pos += bcount;
897
898 debug_log(DBG_SENSE, "[cmd %x] transferred %d bytes on that intr.\n",
899 pc->c[0], bcount);
900 802
901 /* And set the interrupt handler again */ 803 return ide_pc_intr(drive, tape->pc, idetape_pc_intr, IDETAPE_WAIT_CMD,
902 ide_set_handler(drive, &idetape_pc_intr, IDETAPE_WAIT_CMD, NULL); 804 NULL, idetape_update_buffers, idetape_retry_pc,
903 return ide_started; 805 ide_tape_handle_dsc, ide_tape_io_buffers);
904} 806}
905 807
906/* 808/*
@@ -941,56 +843,16 @@ static ide_startstop_t idetape_pc_intr(ide_drive_t *drive)
941 */ 843 */
942static ide_startstop_t idetape_transfer_pc(ide_drive_t *drive) 844static ide_startstop_t idetape_transfer_pc(ide_drive_t *drive)
943{ 845{
944 ide_hwif_t *hwif = drive->hwif;
945 idetape_tape_t *tape = drive->driver_data; 846 idetape_tape_t *tape = drive->driver_data;
946 struct ide_atapi_pc *pc = tape->pc;
947 int retries = 100;
948 ide_startstop_t startstop;
949 u8 ireason;
950
951 if (ide_wait_stat(&startstop, drive, DRQ_STAT, BUSY_STAT, WAIT_READY)) {
952 printk(KERN_ERR "ide-tape: Strange, packet command initiated "
953 "yet DRQ isn't asserted\n");
954 return startstop;
955 }
956 ireason = hwif->INB(hwif->io_ports.nsect_addr);
957 while (retries-- && ((ireason & CD) == 0 || (ireason & IO))) {
958 printk(KERN_ERR "ide-tape: (IO,CoD != (0,1) while issuing "
959 "a packet command, retrying\n");
960 udelay(100);
961 ireason = hwif->INB(hwif->io_ports.nsect_addr);
962 if (retries == 0) {
963 printk(KERN_ERR "ide-tape: (IO,CoD != (0,1) while "
964 "issuing a packet command, ignoring\n");
965 ireason |= CD;
966 ireason &= ~IO;
967 }
968 }
969 if ((ireason & CD) == 0 || (ireason & IO)) {
970 printk(KERN_ERR "ide-tape: (IO,CoD) != (0,1) while issuing "
971 "a packet command\n");
972 return ide_do_reset(drive);
973 }
974 /* Set the interrupt routine */
975 ide_set_handler(drive, &idetape_pc_intr, IDETAPE_WAIT_CMD, NULL);
976#ifdef CONFIG_BLK_DEV_IDEDMA
977 /* Begin DMA, if necessary */
978 if (pc->flags & PC_FLAG_DMA_IN_PROGRESS)
979 hwif->dma_ops->dma_start(drive);
980#endif
981 /* Send the actual packet */
982 hwif->output_data(drive, NULL, pc->c, 12);
983 847
984 return ide_started; 848 return ide_transfer_pc(drive, tape->pc, idetape_pc_intr,
849 IDETAPE_WAIT_CMD, NULL);
985} 850}
986 851
987static ide_startstop_t idetape_issue_pc(ide_drive_t *drive, 852static ide_startstop_t idetape_issue_pc(ide_drive_t *drive,
988 struct ide_atapi_pc *pc) 853 struct ide_atapi_pc *pc)
989{ 854{
990 ide_hwif_t *hwif = drive->hwif;
991 idetape_tape_t *tape = drive->driver_data; 855 idetape_tape_t *tape = drive->driver_data;
992 int dma_ok = 0;
993 u16 bcount;
994 856
995 if (tape->pc->c[0] == REQUEST_SENSE && 857 if (tape->pc->c[0] == REQUEST_SENSE &&
996 pc->c[0] == REQUEST_SENSE) { 858 pc->c[0] == REQUEST_SENSE) {
@@ -1025,50 +887,15 @@ static ide_startstop_t idetape_issue_pc(ide_drive_t *drive,
1025 pc->error = IDETAPE_ERROR_GENERAL; 887 pc->error = IDETAPE_ERROR_GENERAL;
1026 } 888 }
1027 tape->failed_pc = NULL; 889 tape->failed_pc = NULL;
1028 return pc->idetape_callback(drive); 890 pc->callback(drive);
891 return ide_stopped;
1029 } 892 }
1030 debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]); 893 debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]);
1031 894
1032 pc->retries++; 895 pc->retries++;
1033 /* We haven't transferred any data yet */
1034 pc->xferred = 0;
1035 pc->cur_pos = pc->buf;
1036 /* Request to transfer the entire buffer at once */
1037 bcount = pc->req_xfer;
1038 896
1039 if (pc->flags & PC_FLAG_DMA_ERROR) { 897 return ide_issue_pc(drive, pc, idetape_transfer_pc,
1040 pc->flags &= ~PC_FLAG_DMA_ERROR; 898 IDETAPE_WAIT_CMD, NULL);
1041 printk(KERN_WARNING "ide-tape: DMA disabled, "
1042 "reverting to PIO\n");
1043 ide_dma_off(drive);
1044 }
1045 if ((pc->flags & PC_FLAG_DMA_RECOMMENDED) && drive->using_dma)
1046 dma_ok = !hwif->dma_ops->dma_setup(drive);
1047
1048 ide_pktcmd_tf_load(drive, IDE_TFLAG_NO_SELECT_MASK |
1049 IDE_TFLAG_OUT_DEVICE, bcount, dma_ok);
1050
1051 if (dma_ok)
1052 /* Will begin DMA later */
1053 pc->flags |= PC_FLAG_DMA_IN_PROGRESS;
1054 if (test_bit(IDETAPE_FLAG_DRQ_INTERRUPT, &tape->flags)) {
1055 ide_execute_command(drive, WIN_PACKETCMD, &idetape_transfer_pc,
1056 IDETAPE_WAIT_CMD, NULL);
1057 return ide_started;
1058 } else {
1059 ide_execute_pkt_cmd(drive);
1060 return idetape_transfer_pc(drive);
1061 }
1062}
1063
1064static ide_startstop_t idetape_pc_callback(ide_drive_t *drive)
1065{
1066 idetape_tape_t *tape = drive->driver_data;
1067
1068 debug_log(DBG_PROCS, "Enter %s\n", __func__);
1069
1070 idetape_end_request(drive, tape->pc->error ? 0 : 1, 0);
1071 return ide_stopped;
1072} 899}
1073 900
1074/* A mode sense command is used to "sense" tape parameters. */ 901/* A mode sense command is used to "sense" tape parameters. */
@@ -1096,7 +923,6 @@ static void idetape_create_mode_sense_cmd(struct ide_atapi_pc *pc, u8 page_code)
1096 pc->req_xfer = 24; 923 pc->req_xfer = 24;
1097 else 924 else
1098 pc->req_xfer = 50; 925 pc->req_xfer = 50;
1099 pc->idetape_callback = &idetape_pc_callback;
1100} 926}
1101 927
1102static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive) 928static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive)
@@ -1114,80 +940,41 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive)
1114 printk(KERN_ERR "ide-tape: %s: I/O error, ", 940 printk(KERN_ERR "ide-tape: %s: I/O error, ",
1115 tape->name); 941 tape->name);
1116 /* Retry operation */ 942 /* Retry operation */
1117 return idetape_retry_pc(drive); 943 idetape_retry_pc(drive);
944 return ide_stopped;
1118 } 945 }
1119 pc->error = 0; 946 pc->error = 0;
1120 if (tape->failed_pc == pc)
1121 tape->failed_pc = NULL;
1122 } else { 947 } else {
1123 pc->error = IDETAPE_ERROR_GENERAL; 948 pc->error = IDETAPE_ERROR_GENERAL;
1124 tape->failed_pc = NULL; 949 tape->failed_pc = NULL;
1125 } 950 }
1126 return pc->idetape_callback(drive); 951 pc->callback(drive);
1127}
1128
1129static ide_startstop_t idetape_rw_callback(ide_drive_t *drive)
1130{
1131 idetape_tape_t *tape = drive->driver_data;
1132 struct request *rq = HWGROUP(drive)->rq;
1133 int blocks = tape->pc->xferred / tape->blk_size;
1134
1135 tape->avg_size += blocks * tape->blk_size;
1136
1137 if (time_after_eq(jiffies, tape->avg_time + HZ)) {
1138 tape->avg_speed = tape->avg_size * HZ /
1139 (jiffies - tape->avg_time) / 1024;
1140 tape->avg_size = 0;
1141 tape->avg_time = jiffies;
1142 }
1143 debug_log(DBG_PROCS, "Enter %s\n", __func__);
1144
1145 tape->first_frame += blocks;
1146 rq->current_nr_sectors -= blocks;
1147
1148 if (!tape->pc->error)
1149 idetape_end_request(drive, 1, 0);
1150 else
1151 idetape_end_request(drive, tape->pc->error, 0);
1152 return ide_stopped; 952 return ide_stopped;
1153} 953}
1154 954
1155static void idetape_create_read_cmd(idetape_tape_t *tape, 955static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
1156 struct ide_atapi_pc *pc, 956 struct ide_atapi_pc *pc, unsigned int length,
1157 unsigned int length, struct idetape_bh *bh) 957 struct idetape_bh *bh, u8 opcode)
1158{ 958{
1159 idetape_init_pc(pc); 959 idetape_init_pc(pc);
1160 pc->c[0] = READ_6;
1161 put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); 960 put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
1162 pc->c[1] = 1; 961 pc->c[1] = 1;
1163 pc->idetape_callback = &idetape_rw_callback;
1164 pc->bh = bh; 962 pc->bh = bh;
1165 atomic_set(&bh->b_count, 0);
1166 pc->buf = NULL; 963 pc->buf = NULL;
1167 pc->buf_size = length * tape->blk_size; 964 pc->buf_size = length * tape->blk_size;
1168 pc->req_xfer = pc->buf_size; 965 pc->req_xfer = pc->buf_size;
1169 if (pc->req_xfer == tape->buffer_size) 966 if (pc->req_xfer == tape->buffer_size)
1170 pc->flags |= PC_FLAG_DMA_RECOMMENDED; 967 pc->flags |= PC_FLAG_DMA_OK;
1171}
1172 968
1173static void idetape_create_write_cmd(idetape_tape_t *tape, 969 if (opcode == READ_6) {
1174 struct ide_atapi_pc *pc, 970 pc->c[0] = READ_6;
1175 unsigned int length, struct idetape_bh *bh) 971 atomic_set(&bh->b_count, 0);
1176{ 972 } else if (opcode == WRITE_6) {
1177 idetape_init_pc(pc); 973 pc->c[0] = WRITE_6;
1178 pc->c[0] = WRITE_6; 974 pc->flags |= PC_FLAG_WRITING;
1179 put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); 975 pc->b_data = bh->b_data;
1180 pc->c[1] = 1; 976 pc->b_count = atomic_read(&bh->b_count);
1181 pc->idetape_callback = &idetape_rw_callback; 977 }
1182 pc->flags |= PC_FLAG_WRITING;
1183 pc->bh = bh;
1184 pc->b_data = bh->b_data;
1185 pc->b_count = atomic_read(&bh->b_count);
1186 pc->buf = NULL;
1187 pc->buf_size = length * tape->blk_size;
1188 pc->req_xfer = pc->buf_size;
1189 if (pc->req_xfer == tape->buffer_size)
1190 pc->flags |= PC_FLAG_DMA_RECOMMENDED;
1191} 978}
1192 979
1193static ide_startstop_t idetape_do_request(ide_drive_t *drive, 980static ide_startstop_t idetape_do_request(ide_drive_t *drive,
@@ -1211,8 +998,10 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
1211 } 998 }
1212 999
1213 /* Retry a failed packet command */ 1000 /* Retry a failed packet command */
1214 if (tape->failed_pc && tape->pc->c[0] == REQUEST_SENSE) 1001 if (tape->failed_pc && tape->pc->c[0] == REQUEST_SENSE) {
1215 return idetape_issue_pc(drive, tape->failed_pc); 1002 pc = tape->failed_pc;
1003 goto out;
1004 }
1216 1005
1217 if (postponed_rq != NULL) 1006 if (postponed_rq != NULL)
1218 if (rq != postponed_rq) { 1007 if (rq != postponed_rq) {
@@ -1262,14 +1051,16 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
1262 } 1051 }
1263 if (rq->cmd[0] & REQ_IDETAPE_READ) { 1052 if (rq->cmd[0] & REQ_IDETAPE_READ) {
1264 pc = idetape_next_pc_storage(drive); 1053 pc = idetape_next_pc_storage(drive);
1265 idetape_create_read_cmd(tape, pc, rq->current_nr_sectors, 1054 ide_tape_create_rw_cmd(tape, pc, rq->current_nr_sectors,
1266 (struct idetape_bh *)rq->special); 1055 (struct idetape_bh *)rq->special,
1056 READ_6);
1267 goto out; 1057 goto out;
1268 } 1058 }
1269 if (rq->cmd[0] & REQ_IDETAPE_WRITE) { 1059 if (rq->cmd[0] & REQ_IDETAPE_WRITE) {
1270 pc = idetape_next_pc_storage(drive); 1060 pc = idetape_next_pc_storage(drive);
1271 idetape_create_write_cmd(tape, pc, rq->current_nr_sectors, 1061 ide_tape_create_rw_cmd(tape, pc, rq->current_nr_sectors,
1272 (struct idetape_bh *)rq->special); 1062 (struct idetape_bh *)rq->special,
1063 WRITE_6);
1273 goto out; 1064 goto out;
1274 } 1065 }
1275 if (rq->cmd[0] & REQ_IDETAPE_PC1) { 1066 if (rq->cmd[0] & REQ_IDETAPE_PC1) {
@@ -1284,6 +1075,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
1284 } 1075 }
1285 BUG(); 1076 BUG();
1286out: 1077out:
1078 if (test_bit(IDETAPE_FLAG_DRQ_INTERRUPT, &tape->flags))
1079 pc->flags |= PC_FLAG_DRQ_INTERRUPT;
1080
1287 return idetape_issue_pc(drive, pc); 1081 return idetape_issue_pc(drive, pc);
1288} 1082}
1289 1083
@@ -1447,40 +1241,6 @@ static void idetape_init_merge_buffer(idetape_tape_t *tape)
1447 } 1241 }
1448} 1242}
1449 1243
1450static ide_startstop_t idetape_read_position_callback(ide_drive_t *drive)
1451{
1452 idetape_tape_t *tape = drive->driver_data;
1453 u8 *readpos = tape->pc->buf;
1454
1455 debug_log(DBG_PROCS, "Enter %s\n", __func__);
1456
1457 if (!tape->pc->error) {
1458 debug_log(DBG_SENSE, "BOP - %s\n",
1459 (readpos[0] & 0x80) ? "Yes" : "No");
1460 debug_log(DBG_SENSE, "EOP - %s\n",
1461 (readpos[0] & 0x40) ? "Yes" : "No");
1462
1463 if (readpos[0] & 0x4) {
1464 printk(KERN_INFO "ide-tape: Block location is unknown"
1465 "to the tape\n");
1466 clear_bit(IDETAPE_FLAG_ADDRESS_VALID, &tape->flags);
1467 idetape_end_request(drive, 0, 0);
1468 } else {
1469 debug_log(DBG_SENSE, "Block Location - %u\n",
1470 be32_to_cpu(*(u32 *)&readpos[4]));
1471
1472 tape->partition = readpos[1];
1473 tape->first_frame =
1474 be32_to_cpu(*(u32 *)&readpos[4]);
1475 set_bit(IDETAPE_FLAG_ADDRESS_VALID, &tape->flags);
1476 idetape_end_request(drive, 1, 0);
1477 }
1478 } else {
1479 idetape_end_request(drive, 0, 0);
1480 }
1481 return ide_stopped;
1482}
1483
1484/* 1244/*
1485 * Write a filemark if write_filemark=1. Flush the device buffers without 1245 * Write a filemark if write_filemark=1. Flush the device buffers without
1486 * writing a filemark otherwise. 1246 * writing a filemark otherwise.
@@ -1492,14 +1252,12 @@ static void idetape_create_write_filemark_cmd(ide_drive_t *drive,
1492 pc->c[0] = WRITE_FILEMARKS; 1252 pc->c[0] = WRITE_FILEMARKS;
1493 pc->c[4] = write_filemark; 1253 pc->c[4] = write_filemark;
1494 pc->flags |= PC_FLAG_WAIT_FOR_DSC; 1254 pc->flags |= PC_FLAG_WAIT_FOR_DSC;
1495 pc->idetape_callback = &idetape_pc_callback;
1496} 1255}
1497 1256
1498static void idetape_create_test_unit_ready_cmd(struct ide_atapi_pc *pc) 1257static void idetape_create_test_unit_ready_cmd(struct ide_atapi_pc *pc)
1499{ 1258{
1500 idetape_init_pc(pc); 1259 idetape_init_pc(pc);
1501 pc->c[0] = TEST_UNIT_READY; 1260 pc->c[0] = TEST_UNIT_READY;
1502 pc->idetape_callback = &idetape_pc_callback;
1503} 1261}
1504 1262
1505/* 1263/*
@@ -1518,12 +1276,16 @@ static void idetape_create_test_unit_ready_cmd(struct ide_atapi_pc *pc)
1518static int idetape_queue_pc_tail(ide_drive_t *drive, struct ide_atapi_pc *pc) 1276static int idetape_queue_pc_tail(ide_drive_t *drive, struct ide_atapi_pc *pc)
1519{ 1277{
1520 struct ide_tape_obj *tape = drive->driver_data; 1278 struct ide_tape_obj *tape = drive->driver_data;
1521 struct request rq; 1279 struct request *rq;
1280 int error;
1522 1281
1523 idetape_init_rq(&rq, REQ_IDETAPE_PC1); 1282 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
1524 rq.buffer = (char *) pc; 1283 rq->cmd_type = REQ_TYPE_SPECIAL;
1525 rq.rq_disk = tape->disk; 1284 rq->cmd[0] = REQ_IDETAPE_PC1;
1526 return ide_do_drive_cmd(drive, &rq, ide_wait); 1285 rq->buffer = (char *)pc;
1286 error = blk_execute_rq(drive->queue, tape->disk, rq, 0);
1287 blk_put_request(rq);
1288 return error;
1527} 1289}
1528 1290
1529static void idetape_create_load_unload_cmd(ide_drive_t *drive, 1291static void idetape_create_load_unload_cmd(ide_drive_t *drive,
@@ -1533,7 +1295,6 @@ static void idetape_create_load_unload_cmd(ide_drive_t *drive,
1533 pc->c[0] = START_STOP; 1295 pc->c[0] = START_STOP;
1534 pc->c[4] = cmd; 1296 pc->c[4] = cmd;
1535 pc->flags |= PC_FLAG_WAIT_FOR_DSC; 1297 pc->flags |= PC_FLAG_WAIT_FOR_DSC;
1536 pc->idetape_callback = &idetape_pc_callback;
1537} 1298}
1538 1299
1539static int idetape_wait_ready(ide_drive_t *drive, unsigned long timeout) 1300static int idetape_wait_ready(ide_drive_t *drive, unsigned long timeout)
@@ -1585,7 +1346,6 @@ static void idetape_create_read_position_cmd(struct ide_atapi_pc *pc)
1585 idetape_init_pc(pc); 1346 idetape_init_pc(pc);
1586 pc->c[0] = READ_POSITION; 1347 pc->c[0] = READ_POSITION;
1587 pc->req_xfer = 20; 1348 pc->req_xfer = 20;
1588 pc->idetape_callback = &idetape_read_position_callback;
1589} 1349}
1590 1350
1591static int idetape_read_position(ide_drive_t *drive) 1351static int idetape_read_position(ide_drive_t *drive)
@@ -1613,7 +1373,6 @@ static void idetape_create_locate_cmd(ide_drive_t *drive,
1613 put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[3]); 1373 put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[3]);
1614 pc->c[8] = partition; 1374 pc->c[8] = partition;
1615 pc->flags |= PC_FLAG_WAIT_FOR_DSC; 1375 pc->flags |= PC_FLAG_WAIT_FOR_DSC;
1616 pc->idetape_callback = &idetape_pc_callback;
1617} 1376}
1618 1377
1619static int idetape_create_prevent_cmd(ide_drive_t *drive, 1378static int idetape_create_prevent_cmd(ide_drive_t *drive,
@@ -1628,7 +1387,6 @@ static int idetape_create_prevent_cmd(ide_drive_t *drive,
1628 idetape_init_pc(pc); 1387 idetape_init_pc(pc);
1629 pc->c[0] = ALLOW_MEDIUM_REMOVAL; 1388 pc->c[0] = ALLOW_MEDIUM_REMOVAL;
1630 pc->c[4] = prevent; 1389 pc->c[4] = prevent;
1631 pc->idetape_callback = &idetape_pc_callback;
1632 return 1; 1390 return 1;
1633} 1391}
1634 1392
@@ -1700,26 +1458,33 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks,
1700 struct idetape_bh *bh) 1458 struct idetape_bh *bh)
1701{ 1459{
1702 idetape_tape_t *tape = drive->driver_data; 1460 idetape_tape_t *tape = drive->driver_data;
1703 struct request rq; 1461 struct request *rq;
1462 int ret, errors;
1704 1463
1705 debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd); 1464 debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd);
1706 1465
1707 idetape_init_rq(&rq, cmd); 1466 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
1708 rq.rq_disk = tape->disk; 1467 rq->cmd_type = REQ_TYPE_SPECIAL;
1709 rq.special = (void *)bh; 1468 rq->cmd[0] = cmd;
1710 rq.sector = tape->first_frame; 1469 rq->rq_disk = tape->disk;
1711 rq.nr_sectors = blocks; 1470 rq->special = (void *)bh;
1712 rq.current_nr_sectors = blocks; 1471 rq->sector = tape->first_frame;
1713 (void) ide_do_drive_cmd(drive, &rq, ide_wait); 1472 rq->nr_sectors = blocks;
1473 rq->current_nr_sectors = blocks;
1474 blk_execute_rq(drive->queue, tape->disk, rq, 0);
1475
1476 errors = rq->errors;
1477 ret = tape->blk_size * (blocks - rq->current_nr_sectors);
1478 blk_put_request(rq);
1714 1479
1715 if ((cmd & (REQ_IDETAPE_READ | REQ_IDETAPE_WRITE)) == 0) 1480 if ((cmd & (REQ_IDETAPE_READ | REQ_IDETAPE_WRITE)) == 0)
1716 return 0; 1481 return 0;
1717 1482
1718 if (tape->merge_bh) 1483 if (tape->merge_bh)
1719 idetape_init_merge_buffer(tape); 1484 idetape_init_merge_buffer(tape);
1720 if (rq.errors == IDETAPE_ERROR_GENERAL) 1485 if (errors == IDETAPE_ERROR_GENERAL)
1721 return -EIO; 1486 return -EIO;
1722 return (tape->blk_size * (blocks-rq.current_nr_sectors)); 1487 return ret;
1723} 1488}
1724 1489
1725static void idetape_create_inquiry_cmd(struct ide_atapi_pc *pc) 1490static void idetape_create_inquiry_cmd(struct ide_atapi_pc *pc)
@@ -1728,7 +1493,6 @@ static void idetape_create_inquiry_cmd(struct ide_atapi_pc *pc)
1728 pc->c[0] = INQUIRY; 1493 pc->c[0] = INQUIRY;
1729 pc->c[4] = 254; 1494 pc->c[4] = 254;
1730 pc->req_xfer = 254; 1495 pc->req_xfer = 254;
1731 pc->idetape_callback = &idetape_pc_callback;
1732} 1496}
1733 1497
1734static void idetape_create_rewind_cmd(ide_drive_t *drive, 1498static void idetape_create_rewind_cmd(ide_drive_t *drive,
@@ -1737,7 +1501,6 @@ static void idetape_create_rewind_cmd(ide_drive_t *drive,
1737 idetape_init_pc(pc); 1501 idetape_init_pc(pc);
1738 pc->c[0] = REZERO_UNIT; 1502 pc->c[0] = REZERO_UNIT;
1739 pc->flags |= PC_FLAG_WAIT_FOR_DSC; 1503 pc->flags |= PC_FLAG_WAIT_FOR_DSC;
1740 pc->idetape_callback = &idetape_pc_callback;
1741} 1504}
1742 1505
1743static void idetape_create_erase_cmd(struct ide_atapi_pc *pc) 1506static void idetape_create_erase_cmd(struct ide_atapi_pc *pc)
@@ -1746,7 +1509,6 @@ static void idetape_create_erase_cmd(struct ide_atapi_pc *pc)
1746 pc->c[0] = ERASE; 1509 pc->c[0] = ERASE;
1747 pc->c[1] = 1; 1510 pc->c[1] = 1;
1748 pc->flags |= PC_FLAG_WAIT_FOR_DSC; 1511 pc->flags |= PC_FLAG_WAIT_FOR_DSC;
1749 pc->idetape_callback = &idetape_pc_callback;
1750} 1512}
1751 1513
1752static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd) 1514static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd)
@@ -1756,7 +1518,6 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd)
1756 put_unaligned(cpu_to_be32(count), (unsigned int *) &pc->c[1]); 1518 put_unaligned(cpu_to_be32(count), (unsigned int *) &pc->c[1]);
1757 pc->c[1] = cmd; 1519 pc->c[1] = cmd;
1758 pc->flags |= PC_FLAG_WAIT_FOR_DSC; 1520 pc->flags |= PC_FLAG_WAIT_FOR_DSC;
1759 pc->idetape_callback = &idetape_pc_callback;
1760} 1521}
1761 1522
1762/* Queue up a character device originated write request. */ 1523/* Queue up a character device originated write request. */
@@ -2751,9 +2512,8 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor)
2751 * Ensure that the number we got makes sense; limit it within 2512 * Ensure that the number we got makes sense; limit it within
2752 * IDETAPE_DSC_RW_MIN and IDETAPE_DSC_RW_MAX. 2513 * IDETAPE_DSC_RW_MIN and IDETAPE_DSC_RW_MAX.
2753 */ 2514 */
2754 tape->best_dsc_rw_freq = max_t(unsigned long, 2515 tape->best_dsc_rw_freq = clamp_t(unsigned long, t, IDETAPE_DSC_RW_MIN,
2755 min_t(unsigned long, t, IDETAPE_DSC_RW_MAX), 2516 IDETAPE_DSC_RW_MAX);
2756 IDETAPE_DSC_RW_MIN);
2757 printk(KERN_INFO "ide-tape: %s <-> %s: %dKBps, %d*%dkB buffer, " 2517 printk(KERN_INFO "ide-tape: %s <-> %s: %dKBps, %d*%dkB buffer, "
2758 "%lums tDSC%s\n", 2518 "%lums tDSC%s\n",
2759 drive->name, tape->name, *(u16 *)&tape->caps[14], 2519 drive->name, tape->name, *(u16 *)&tape->caps[14],
@@ -2905,11 +2665,6 @@ static int ide_tape_probe(ide_drive_t *drive)
2905 " the driver\n", drive->name); 2665 " the driver\n", drive->name);
2906 goto failed; 2666 goto failed;
2907 } 2667 }
2908 if (drive->scsi) {
2909 printk(KERN_INFO "ide-tape: passing drive %s to ide-scsi"
2910 " emulation.\n", drive->name);
2911 goto failed;
2912 }
2913 tape = kzalloc(sizeof(idetape_tape_t), GFP_KERNEL); 2668 tape = kzalloc(sizeof(idetape_tape_t), GFP_KERNEL);
2914 if (tape == NULL) { 2669 if (tape == NULL) {
2915 printk(KERN_ERR "ide-tape: %s: Can't allocate a tape struct\n", 2670 printk(KERN_ERR "ide-tape: %s: Can't allocate a tape struct\n",
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index ab545ffa1549..cf55a48a7dd2 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -109,13 +109,15 @@ ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task)
109 109
110 if ((task->tf_flags & IDE_TFLAG_DMA_PIO_FALLBACK) == 0) { 110 if ((task->tf_flags & IDE_TFLAG_DMA_PIO_FALLBACK) == 0) {
111 ide_tf_dump(drive->name, tf); 111 ide_tf_dump(drive->name, tf);
112 ide_set_irq(drive, 1);
113 SELECT_MASK(drive, 0);
112 hwif->tf_load(drive, task); 114 hwif->tf_load(drive, task);
113 } 115 }
114 116
115 switch (task->data_phase) { 117 switch (task->data_phase) {
116 case TASKFILE_MULTI_OUT: 118 case TASKFILE_MULTI_OUT:
117 case TASKFILE_OUT: 119 case TASKFILE_OUT:
118 hwif->OUTBSYNC(drive, tf->command, hwif->io_ports.command_addr); 120 hwif->OUTBSYNC(hwif, tf->command, hwif->io_ports.command_addr);
119 ndelay(400); /* FIXME */ 121 ndelay(400); /* FIXME */
120 return pre_task_out_intr(drive, task->rq); 122 return pre_task_out_intr(drive, task->rq);
121 case TASKFILE_MULTI_IN: 123 case TASKFILE_MULTI_IN:
@@ -492,11 +494,12 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive, struct request *rq)
492 494
493int ide_raw_taskfile(ide_drive_t *drive, ide_task_t *task, u8 *buf, u16 nsect) 495int ide_raw_taskfile(ide_drive_t *drive, ide_task_t *task, u8 *buf, u16 nsect)
494{ 496{
495 struct request rq; 497 struct request *rq;
498 int error;
496 499
497 blk_rq_init(NULL, &rq); 500 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
498 rq.cmd_type = REQ_TYPE_ATA_TASKFILE; 501 rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
499 rq.buffer = buf; 502 rq->buffer = buf;
500 503
501 /* 504 /*
502 * (ks) We transfer currently only whole sectors. 505 * (ks) We transfer currently only whole sectors.
@@ -504,16 +507,19 @@ int ide_raw_taskfile(ide_drive_t *drive, ide_task_t *task, u8 *buf, u16 nsect)
504 * if we would find a solution to transfer any size. 507 * if we would find a solution to transfer any size.
505 * To support special commands like READ LONG. 508 * To support special commands like READ LONG.
506 */ 509 */
507 rq.hard_nr_sectors = rq.nr_sectors = nsect; 510 rq->hard_nr_sectors = rq->nr_sectors = nsect;
508 rq.hard_cur_sectors = rq.current_nr_sectors = nsect; 511 rq->hard_cur_sectors = rq->current_nr_sectors = nsect;
509 512
510 if (task->tf_flags & IDE_TFLAG_WRITE) 513 if (task->tf_flags & IDE_TFLAG_WRITE)
511 rq.cmd_flags |= REQ_RW; 514 rq->cmd_flags |= REQ_RW;
512 515
513 rq.special = task; 516 rq->special = task;
514 task->rq = &rq; 517 task->rq = rq;
515 518
516 return ide_do_drive_cmd(drive, &rq, ide_wait); 519 error = blk_execute_rq(drive->queue, NULL, rq, 0);
520 blk_put_request(rq);
521
522 return error;
517} 523}
518 524
519EXPORT_SYMBOL(ide_raw_taskfile); 525EXPORT_SYMBOL(ide_raw_taskfile);
@@ -739,12 +745,14 @@ int ide_cmd_ioctl (ide_drive_t *drive, unsigned int cmd, unsigned long arg)
739 struct hd_driveid *id = drive->id; 745 struct hd_driveid *id = drive->id;
740 746
741 if (NULL == (void *) arg) { 747 if (NULL == (void *) arg) {
742 struct request rq; 748 struct request *rq;
743 749
744 ide_init_drive_cmd(&rq); 750 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
745 rq.cmd_type = REQ_TYPE_ATA_TASKFILE; 751 rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
752 err = blk_execute_rq(drive->queue, NULL, rq, 0);
753 blk_put_request(rq);
746 754
747 return ide_do_drive_cmd(drive, &rq, ide_wait); 755 return err;
748 } 756 }
749 757
750 if (copy_from_user(args, (void __user *)arg, 4)) 758 if (copy_from_user(args, (void __user *)arg, 4))
diff --git a/drivers/ide/ide-timing.h b/drivers/ide/ide-timing.h
index 3b12ffe77071..2e91c5870b4c 100644
--- a/drivers/ide/ide-timing.h
+++ b/drivers/ide/ide-timing.h
@@ -95,7 +95,6 @@ static struct ide_timing ide_timing[] = {
95#define IDE_TIMING_UDMA 0x80 95#define IDE_TIMING_UDMA 0x80
96#define IDE_TIMING_ALL 0xff 96#define IDE_TIMING_ALL 0xff
97 97
98#define FIT(v,vmin,vmax) max_t(short,min_t(short,v,vmax),vmin)
99#define ENOUGH(v,unit) (((v)-1)/(unit)+1) 98#define ENOUGH(v,unit) (((v)-1)/(unit)+1)
100#define EZ(v,unit) ((v)?ENOUGH(v,unit):0) 99#define EZ(v,unit) ((v)?ENOUGH(v,unit):0)
101 100
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 300431d080a9..2b8453510e09 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -86,13 +86,10 @@ static const u8 ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR,
86 IDE6_MAJOR, IDE7_MAJOR, 86 IDE6_MAJOR, IDE7_MAJOR,
87 IDE8_MAJOR, IDE9_MAJOR }; 87 IDE8_MAJOR, IDE9_MAJOR };
88 88
89static int idebus_parameter; /* holds the "idebus=" parameter */
90static int system_bus_speed; /* holds what we think is VESA/PCI bus speed */
91
92DEFINE_MUTEX(ide_cfg_mtx); 89DEFINE_MUTEX(ide_cfg_mtx);
93 __cacheline_aligned_in_smp DEFINE_SPINLOCK(ide_lock);
94 90
95int noautodma = 0; 91__cacheline_aligned_in_smp DEFINE_SPINLOCK(ide_lock);
92EXPORT_SYMBOL(ide_lock);
96 93
97ide_hwif_t ide_hwifs[MAX_HWIFS]; /* master data repository */ 94ide_hwif_t ide_hwifs[MAX_HWIFS]; /* master data repository */
98 95
@@ -139,7 +136,6 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
139 drive->media = ide_disk; 136 drive->media = ide_disk;
140 drive->select.all = (unit<<4)|0xa0; 137 drive->select.all = (unit<<4)|0xa0;
141 drive->hwif = hwif; 138 drive->hwif = hwif;
142 drive->ctl = 0x08;
143 drive->ready_stat = READY_STAT; 139 drive->ready_stat = READY_STAT;
144 drive->bad_wstat = BAD_W_STAT; 140 drive->bad_wstat = BAD_W_STAT;
145 drive->special.b.recalibrate = 1; 141 drive->special.b.recalibrate = 1;
@@ -154,32 +150,9 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
154 } 150 }
155} 151}
156 152
157/*
158 * init_ide_data() sets reasonable default values into all fields
159 * of all instances of the hwifs and drives, but only on the first call.
160 * Subsequent calls have no effect (they don't wipe out anything).
161 *
162 * This routine is normally called at driver initialization time,
163 * but may also be called MUCH earlier during kernel "command-line"
164 * parameter processing. As such, we cannot depend on any other parts
165 * of the kernel (such as memory allocation) to be functioning yet.
166 *
167 * This is too bad, as otherwise we could dynamically allocate the
168 * ide_drive_t structs as needed, rather than always consuming memory
169 * for the max possible number (MAX_HWIFS * MAX_DRIVES) of them.
170 *
171 * FIXME: We should stuff the setup data into __init and copy the
172 * relevant hwifs/allocate them properly during boot.
173 */
174#define MAGIC_COOKIE 0x12345678
175static void __init init_ide_data (void) 153static void __init init_ide_data (void)
176{ 154{
177 unsigned int index; 155 unsigned int index;
178 static unsigned long magic_cookie = MAGIC_COOKIE;
179
180 if (magic_cookie != MAGIC_COOKIE)
181 return; /* already initialized */
182 magic_cookie = 0;
183 156
184 /* Initialise all interface structures */ 157 /* Initialise all interface structures */
185 for (index = 0; index < MAX_HWIFS; ++index) { 158 for (index = 0; index < MAX_HWIFS; ++index) {
@@ -189,38 +162,6 @@ static void __init init_ide_data (void)
189 } 162 }
190} 163}
191 164
192/**
193 * ide_system_bus_speed - guess bus speed
194 *
195 * ide_system_bus_speed() returns what we think is the system VESA/PCI
196 * bus speed (in MHz). This is used for calculating interface PIO timings.
197 * The default is 40 for known PCI systems, 50 otherwise.
198 * The "idebus=xx" parameter can be used to override this value.
199 * The actual value to be used is computed/displayed the first time
200 * through. Drivers should only use this as a last resort.
201 *
202 * Returns a guessed speed in MHz.
203 */
204
205static int ide_system_bus_speed(void)
206{
207#ifdef CONFIG_PCI
208 static struct pci_device_id pci_default[] = {
209 { PCI_DEVICE(PCI_ANY_ID, PCI_ANY_ID) },
210 { }
211 };
212#else
213#define pci_default 0
214#endif /* CONFIG_PCI */
215
216 /* user supplied value */
217 if (idebus_parameter)
218 return idebus_parameter;
219
220 /* safe default value for PCI or VESA and PCI*/
221 return pci_dev_present(pci_default) ? 33 : 50;
222}
223
224void ide_remove_port_from_hwgroup(ide_hwif_t *hwif) 165void ide_remove_port_from_hwgroup(ide_hwif_t *hwif)
225{ 166{
226 ide_hwgroup_t *hwgroup = hwif->hwgroup; 167 ide_hwgroup_t *hwgroup = hwif->hwgroup;
@@ -498,7 +439,7 @@ out:
498 439
499int set_pio_mode(ide_drive_t *drive, int arg) 440int set_pio_mode(ide_drive_t *drive, int arg)
500{ 441{
501 struct request rq; 442 struct request *rq;
502 ide_hwif_t *hwif = drive->hwif; 443 ide_hwif_t *hwif = drive->hwif;
503 const struct ide_port_ops *port_ops = hwif->port_ops; 444 const struct ide_port_ops *port_ops = hwif->port_ops;
504 445
@@ -512,12 +453,15 @@ int set_pio_mode(ide_drive_t *drive, int arg)
512 if (drive->special.b.set_tune) 453 if (drive->special.b.set_tune)
513 return -EBUSY; 454 return -EBUSY;
514 455
515 ide_init_drive_cmd(&rq); 456 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
516 rq.cmd_type = REQ_TYPE_ATA_TASKFILE; 457 rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
517 458
518 drive->tune_req = (u8) arg; 459 drive->tune_req = (u8) arg;
519 drive->special.b.set_tune = 1; 460 drive->special.b.set_tune = 1;
520 (void) ide_do_drive_cmd(drive, &rq, ide_wait); 461
462 blk_execute_rq(drive->queue, NULL, rq, 0);
463 blk_put_request(rq);
464
521 return 0; 465 return 0;
522} 466}
523 467
@@ -537,25 +481,11 @@ static int set_unmaskirq(ide_drive_t *drive, int arg)
537 return 0; 481 return 0;
538} 482}
539 483
540/**
541 * system_bus_clock - clock guess
542 *
543 * External version of the bus clock guess used by very old IDE drivers
544 * for things like VLB timings. Should not be used.
545 */
546
547int system_bus_clock (void)
548{
549 return system_bus_speed;
550}
551
552EXPORT_SYMBOL(system_bus_clock);
553
554static int generic_ide_suspend(struct device *dev, pm_message_t mesg) 484static int generic_ide_suspend(struct device *dev, pm_message_t mesg)
555{ 485{
556 ide_drive_t *drive = dev->driver_data; 486 ide_drive_t *drive = dev->driver_data;
557 ide_hwif_t *hwif = HWIF(drive); 487 ide_hwif_t *hwif = HWIF(drive);
558 struct request rq; 488 struct request *rq;
559 struct request_pm_state rqpm; 489 struct request_pm_state rqpm;
560 ide_task_t args; 490 ide_task_t args;
561 int ret; 491 int ret;
@@ -564,18 +494,19 @@ static int generic_ide_suspend(struct device *dev, pm_message_t mesg)
564 if (!(drive->dn % 2)) 494 if (!(drive->dn % 2))
565 ide_acpi_get_timing(hwif); 495 ide_acpi_get_timing(hwif);
566 496
567 blk_rq_init(NULL, &rq);
568 memset(&rqpm, 0, sizeof(rqpm)); 497 memset(&rqpm, 0, sizeof(rqpm));
569 memset(&args, 0, sizeof(args)); 498 memset(&args, 0, sizeof(args));
570 rq.cmd_type = REQ_TYPE_PM_SUSPEND; 499 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
571 rq.special = &args; 500 rq->cmd_type = REQ_TYPE_PM_SUSPEND;
572 rq.data = &rqpm; 501 rq->special = &args;
502 rq->data = &rqpm;
573 rqpm.pm_step = ide_pm_state_start_suspend; 503 rqpm.pm_step = ide_pm_state_start_suspend;
574 if (mesg.event == PM_EVENT_PRETHAW) 504 if (mesg.event == PM_EVENT_PRETHAW)
575 mesg.event = PM_EVENT_FREEZE; 505 mesg.event = PM_EVENT_FREEZE;
576 rqpm.pm_state = mesg.event; 506 rqpm.pm_state = mesg.event;
577 507
578 ret = ide_do_drive_cmd(drive, &rq, ide_wait); 508 ret = blk_execute_rq(drive->queue, NULL, rq, 0);
509 blk_put_request(rq);
579 /* only call ACPI _PS3 after both drivers are suspended */ 510 /* only call ACPI _PS3 after both drivers are suspended */
580 if (!ret && (((drive->dn % 2) && hwif->drives[0].present 511 if (!ret && (((drive->dn % 2) && hwif->drives[0].present
581 && hwif->drives[1].present) 512 && hwif->drives[1].present)
@@ -589,7 +520,7 @@ static int generic_ide_resume(struct device *dev)
589{ 520{
590 ide_drive_t *drive = dev->driver_data; 521 ide_drive_t *drive = dev->driver_data;
591 ide_hwif_t *hwif = HWIF(drive); 522 ide_hwif_t *hwif = HWIF(drive);
592 struct request rq; 523 struct request *rq;
593 struct request_pm_state rqpm; 524 struct request_pm_state rqpm;
594 ide_task_t args; 525 ide_task_t args;
595 int err; 526 int err;
@@ -602,16 +533,18 @@ static int generic_ide_resume(struct device *dev)
602 533
603 ide_acpi_exec_tfs(drive); 534 ide_acpi_exec_tfs(drive);
604 535
605 blk_rq_init(NULL, &rq);
606 memset(&rqpm, 0, sizeof(rqpm)); 536 memset(&rqpm, 0, sizeof(rqpm));
607 memset(&args, 0, sizeof(args)); 537 memset(&args, 0, sizeof(args));
608 rq.cmd_type = REQ_TYPE_PM_RESUME; 538 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
609 rq.special = &args; 539 rq->cmd_type = REQ_TYPE_PM_RESUME;
610 rq.data = &rqpm; 540 rq->cmd_flags |= REQ_PREEMPT;
541 rq->special = &args;
542 rq->data = &rqpm;
611 rqpm.pm_step = ide_pm_state_start_resume; 543 rqpm.pm_step = ide_pm_state_start_resume;
612 rqpm.pm_state = PM_EVENT_ON; 544 rqpm.pm_state = PM_EVENT_ON;
613 545
614 err = ide_do_drive_cmd(drive, &rq, ide_head_wait); 546 err = blk_execute_rq(drive->queue, NULL, rq, 1);
547 blk_put_request(rq);
615 548
616 if (err == 0 && dev->driver) { 549 if (err == 0 && dev->driver) {
617 ide_driver_t *drv = to_ide_driver(dev->driver); 550 ide_driver_t *drv = to_ide_driver(dev->driver);
@@ -764,212 +697,6 @@ set_val:
764 697
765EXPORT_SYMBOL(generic_ide_ioctl); 698EXPORT_SYMBOL(generic_ide_ioctl);
766 699
767/*
768 * stridx() returns the offset of c within s,
769 * or -1 if c is '\0' or not found within s.
770 */
771static int __init stridx (const char *s, char c)
772{
773 char *i = strchr(s, c);
774 return (i && c) ? i - s : -1;
775}
776
777/*
778 * match_parm() does parsing for ide_setup():
779 *
780 * 1. the first char of s must be '='.
781 * 2. if the remainder matches one of the supplied keywords,
782 * the index (1 based) of the keyword is negated and returned.
783 * 3. if the remainder is a series of no more than max_vals numbers
784 * separated by commas, the numbers are saved in vals[] and a
785 * count of how many were saved is returned. Base10 is assumed,
786 * and base16 is allowed when prefixed with "0x".
787 * 4. otherwise, zero is returned.
788 */
789static int __init match_parm (char *s, const char *keywords[], int vals[], int max_vals)
790{
791 static const char *decimal = "0123456789";
792 static const char *hex = "0123456789abcdef";
793 int i, n;
794
795 if (*s++ == '=') {
796 /*
797 * Try matching against the supplied keywords,
798 * and return -(index+1) if we match one
799 */
800 if (keywords != NULL) {
801 for (i = 0; *keywords != NULL; ++i) {
802 if (!strcmp(s, *keywords++))
803 return -(i+1);
804 }
805 }
806 /*
807 * Look for a series of no more than "max_vals"
808 * numeric values separated by commas, in base10,
809 * or base16 when prefixed with "0x".
810 * Return a count of how many were found.
811 */
812 for (n = 0; (i = stridx(decimal, *s)) >= 0;) {
813 vals[n] = i;
814 while ((i = stridx(decimal, *++s)) >= 0)
815 vals[n] = (vals[n] * 10) + i;
816 if (*s == 'x' && !vals[n]) {
817 while ((i = stridx(hex, *++s)) >= 0)
818 vals[n] = (vals[n] * 0x10) + i;
819 }
820 if (++n == max_vals)
821 break;
822 if (*s == ',' || *s == ';')
823 ++s;
824 }
825 if (!*s)
826 return n;
827 }
828 return 0; /* zero = nothing matched */
829}
830
831/*
832 * ide_setup() gets called VERY EARLY during initialization,
833 * to handle kernel "command line" strings beginning with "hdx=" or "ide".
834 *
835 * Remember to update Documentation/ide/ide.txt if you change something here.
836 */
837static int __init ide_setup(char *s)
838{
839 ide_hwif_t *hwif;
840 ide_drive_t *drive;
841 unsigned int hw, unit;
842 int vals[3];
843 const char max_drive = 'a' + ((MAX_HWIFS * MAX_DRIVES) - 1);
844
845 if (strncmp(s,"hd",2) == 0 && s[2] == '=') /* hd= is for hd.c */
846 return 0; /* driver and not us */
847
848 if (strncmp(s,"ide",3) && strncmp(s,"idebus",6) && strncmp(s,"hd",2))
849 return 0;
850
851 printk(KERN_INFO "ide_setup: %s", s);
852 init_ide_data ();
853
854#ifdef CONFIG_BLK_DEV_IDEDOUBLER
855 if (!strcmp(s, "ide=doubler")) {
856 extern int ide_doubler;
857
858 printk(" : Enabled support for IDE doublers\n");
859 ide_doubler = 1;
860 goto obsolete_option;
861 }
862#endif /* CONFIG_BLK_DEV_IDEDOUBLER */
863
864 if (!strcmp(s, "ide=nodma")) {
865 printk(" : Prevented DMA\n");
866 noautodma = 1;
867 goto obsolete_option;
868 }
869
870#ifdef CONFIG_BLK_DEV_IDEACPI
871 if (!strcmp(s, "ide=noacpi")) {
872 //printk(" : Disable IDE ACPI support.\n");
873 ide_noacpi = 1;
874 goto obsolete_option;
875 }
876 if (!strcmp(s, "ide=acpigtf")) {
877 //printk(" : Enable IDE ACPI _GTF support.\n");
878 ide_acpigtf = 1;
879 goto obsolete_option;
880 }
881 if (!strcmp(s, "ide=acpionboot")) {
882 //printk(" : Call IDE ACPI methods on boot.\n");
883 ide_acpionboot = 1;
884 goto obsolete_option;
885 }
886#endif /* CONFIG_BLK_DEV_IDEACPI */
887
888 /*
889 * Look for drive options: "hdx="
890 */
891 if (s[0] == 'h' && s[1] == 'd' && s[2] >= 'a' && s[2] <= max_drive) {
892 const char *hd_words[] = {
893 "none", "noprobe", "nowerr", "cdrom", "nodma",
894 "-6", "-7", "-8", "-9", "-10",
895 "noflush", "remap", "remap63", "scsi", NULL };
896 unit = s[2] - 'a';
897 hw = unit / MAX_DRIVES;
898 unit = unit % MAX_DRIVES;
899 hwif = &ide_hwifs[hw];
900 drive = &hwif->drives[unit];
901 if (strncmp(s + 4, "ide-", 4) == 0) {
902 strlcpy(drive->driver_req, s + 4, sizeof(drive->driver_req));
903 goto obsolete_option;
904 }
905 switch (match_parm(&s[3], hd_words, vals, 3)) {
906 case -1: /* "none" */
907 case -2: /* "noprobe" */
908 drive->noprobe = 1;
909 goto obsolete_option;
910 case -3: /* "nowerr" */
911 drive->bad_wstat = BAD_R_STAT;
912 goto obsolete_option;
913 case -4: /* "cdrom" */
914 drive->present = 1;
915 drive->media = ide_cdrom;
916 /* an ATAPI device ignores DRDY */
917 drive->ready_stat = 0;
918 goto obsolete_option;
919 case -5: /* nodma */
920 drive->nodma = 1;
921 goto obsolete_option;
922 case -11: /* noflush */
923 drive->noflush = 1;
924 goto obsolete_option;
925 case -12: /* "remap" */
926 drive->remap_0_to_1 = 1;
927 goto obsolete_option;
928 case -13: /* "remap63" */
929 drive->sect0 = 63;
930 goto obsolete_option;
931 case -14: /* "scsi" */
932 drive->scsi = 1;
933 goto obsolete_option;
934 case 3: /* cyl,head,sect */
935 drive->media = ide_disk;
936 drive->ready_stat = READY_STAT;
937 drive->cyl = drive->bios_cyl = vals[0];
938 drive->head = drive->bios_head = vals[1];
939 drive->sect = drive->bios_sect = vals[2];
940 drive->present = 1;
941 drive->forced_geom = 1;
942 goto obsolete_option;
943 default:
944 goto bad_option;
945 }
946 }
947
948 if (s[0] != 'i' || s[1] != 'd' || s[2] != 'e')
949 goto bad_option;
950 /*
951 * Look for bus speed option: "idebus="
952 */
953 if (s[3] == 'b' && s[4] == 'u' && s[5] == 's') {
954 if (match_parm(&s[6], NULL, vals, 1) != 1)
955 goto bad_option;
956 if (vals[0] >= 20 && vals[0] <= 66) {
957 idebus_parameter = vals[0];
958 } else
959 printk(" -- BAD BUS SPEED! Expected value from 20 to 66");
960 goto obsolete_option;
961 }
962
963bad_option:
964 printk(" -- BAD OPTION\n");
965 return 1;
966obsolete_option:
967 printk(" -- OBSOLETE OPTION, WILL BE REMOVED SOON!\n");
968 return 1;
969}
970
971EXPORT_SYMBOL(ide_lock);
972
973static int ide_bus_match(struct device *dev, struct device_driver *drv) 700static int ide_bus_match(struct device *dev, struct device_driver *drv)
974{ 701{
975 return 1; 702 return 1;
@@ -1281,11 +1008,6 @@ static int __init ide_init(void)
1281 int ret; 1008 int ret;
1282 1009
1283 printk(KERN_INFO "Uniform Multi-Platform E-IDE driver\n"); 1010 printk(KERN_INFO "Uniform Multi-Platform E-IDE driver\n");
1284 system_bus_speed = ide_system_bus_speed();
1285
1286 printk(KERN_INFO "ide: Assuming %dMHz system bus speed "
1287 "for PIO modes%s\n", system_bus_speed,
1288 idebus_parameter ? "" : "; override with idebus=xx");
1289 1011
1290 ret = bus_register(&ide_bus_type); 1012 ret = bus_register(&ide_bus_type);
1291 if (ret < 0) { 1013 if (ret < 0) {
@@ -1311,32 +1033,7 @@ out_port_class:
1311 return ret; 1033 return ret;
1312} 1034}
1313 1035
1314#ifdef MODULE 1036static void __exit ide_exit(void)
1315static char *options = NULL;
1316module_param(options, charp, 0);
1317MODULE_LICENSE("GPL");
1318
1319static void __init parse_options (char *line)
1320{
1321 char *next = line;
1322
1323 if (line == NULL || !*line)
1324 return;
1325 while ((line = next) != NULL) {
1326 if ((next = strchr(line,' ')) != NULL)
1327 *next++ = 0;
1328 if (!ide_setup(line))
1329 printk (KERN_INFO "Unknown option '%s'\n", line);
1330 }
1331}
1332
1333int __init init_module (void)
1334{
1335 parse_options(options);
1336 return ide_init();
1337}
1338
1339void __exit cleanup_module (void)
1340{ 1037{
1341 proc_ide_destroy(); 1038 proc_ide_destroy();
1342 1039
@@ -1345,10 +1042,7 @@ void __exit cleanup_module (void)
1345 bus_unregister(&ide_bus_type); 1042 bus_unregister(&ide_bus_type);
1346} 1043}
1347 1044
1348#else /* !MODULE */
1349
1350__setup("", ide_setup);
1351
1352module_init(ide_init); 1045module_init(ide_init);
1046module_exit(ide_exit);
1353 1047
1354#endif /* MODULE */ 1048MODULE_LICENSE("GPL");
diff --git a/drivers/ide/legacy/ali14xx.c b/drivers/ide/legacy/ali14xx.c
index 90c65cf97448..052125fafcfa 100644
--- a/drivers/ide/legacy/ali14xx.c
+++ b/drivers/ide/legacy/ali14xx.c
@@ -116,7 +116,7 @@ static void ali14xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
116 int time1, time2; 116 int time1, time2;
117 u8 param1, param2, param3, param4; 117 u8 param1, param2, param3, param4;
118 unsigned long flags; 118 unsigned long flags;
119 int bus_speed = ide_vlb_clk ? ide_vlb_clk : system_bus_clock(); 119 int bus_speed = ide_vlb_clk ? ide_vlb_clk : 50;
120 120
121 /* calculate timing, according to PIO mode */ 121 /* calculate timing, according to PIO mode */
122 time1 = ide_pio_cycle_time(drive, pio); 122 time1 = ide_pio_cycle_time(drive, pio);
diff --git a/drivers/ide/legacy/gayle.c b/drivers/ide/legacy/gayle.c
index fed7d812761c..b78941680c32 100644
--- a/drivers/ide/legacy/gayle.c
+++ b/drivers/ide/legacy/gayle.c
@@ -64,9 +64,7 @@
64#define GAYLE_HAS_CONTROL_REG (!ide_doubler) 64#define GAYLE_HAS_CONTROL_REG (!ide_doubler)
65#define GAYLE_IDEREG_SIZE (ide_doubler ? 0x1000 : 0x2000) 65#define GAYLE_IDEREG_SIZE (ide_doubler ? 0x1000 : 0x2000)
66 66
67int ide_doubler = 0; /* support IDE doublers? */ 67static int ide_doubler;
68EXPORT_SYMBOL_GPL(ide_doubler);
69
70module_param_named(doubler, ide_doubler, bool, 0); 68module_param_named(doubler, ide_doubler, bool, 0);
71MODULE_PARM_DESC(doubler, "enable support for IDE doublers"); 69MODULE_PARM_DESC(doubler, "enable support for IDE doublers");
72#endif /* CONFIG_BLK_DEV_IDEDOUBLER */ 70#endif /* CONFIG_BLK_DEV_IDEDOUBLER */
diff --git a/drivers/ide/legacy/ht6560b.c b/drivers/ide/legacy/ht6560b.c
index 4fe516df9f74..dd6dfb32e853 100644
--- a/drivers/ide/legacy/ht6560b.c
+++ b/drivers/ide/legacy/ht6560b.c
@@ -212,7 +212,7 @@ static u8 ht_pio2timings(ide_drive_t *drive, const u8 pio)
212{ 212{
213 int active_time, recovery_time; 213 int active_time, recovery_time;
214 int active_cycles, recovery_cycles; 214 int active_cycles, recovery_cycles;
215 int bus_speed = ide_vlb_clk ? ide_vlb_clk : system_bus_clock(); 215 int bus_speed = ide_vlb_clk ? ide_vlb_clk : 50;
216 216
217 if (pio) { 217 if (pio) {
218 unsigned int cycle_time; 218 unsigned int cycle_time;
diff --git a/drivers/ide/legacy/qd65xx.c b/drivers/ide/legacy/qd65xx.c
index 6424af154325..51dba82f8812 100644
--- a/drivers/ide/legacy/qd65xx.c
+++ b/drivers/ide/legacy/qd65xx.c
@@ -110,7 +110,7 @@ static void qd65xx_select(ide_drive_t *drive)
110 110
111static u8 qd6500_compute_timing (ide_hwif_t *hwif, int active_time, int recovery_time) 111static u8 qd6500_compute_timing (ide_hwif_t *hwif, int active_time, int recovery_time)
112{ 112{
113 int clk = ide_vlb_clk ? ide_vlb_clk : system_bus_clock(); 113 int clk = ide_vlb_clk ? ide_vlb_clk : 50;
114 u8 act_cyc, rec_cyc; 114 u8 act_cyc, rec_cyc;
115 115
116 if (clk <= 33) { 116 if (clk <= 33) {
@@ -132,7 +132,7 @@ static u8 qd6500_compute_timing (ide_hwif_t *hwif, int active_time, int recovery
132 132
133static u8 qd6580_compute_timing (int active_time, int recovery_time) 133static u8 qd6580_compute_timing (int active_time, int recovery_time)
134{ 134{
135 int clk = ide_vlb_clk ? ide_vlb_clk : system_bus_clock(); 135 int clk = ide_vlb_clk ? ide_vlb_clk : 50;
136 u8 act_cyc, rec_cyc; 136 u8 act_cyc, rec_cyc;
137 137
138 act_cyc = 17 - IDE_IN(active_time * clk / 1000 + 1, 2, 17); 138 act_cyc = 17 - IDE_IN(active_time * clk / 1000 + 1, 2, 17);
diff --git a/drivers/ide/pci/aec62xx.c b/drivers/ide/pci/aec62xx.c
index 7f46c224b7c4..ae7a4329a581 100644
--- a/drivers/ide/pci/aec62xx.c
+++ b/drivers/ide/pci/aec62xx.c
@@ -140,7 +140,7 @@ static void aec_set_pio_mode(ide_drive_t *drive, const u8 pio)
140 140
141static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev, const char *name) 141static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev, const char *name)
142{ 142{
143 int bus_speed = ide_pci_clk ? ide_pci_clk : system_bus_clock(); 143 int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
144 144
145 if (bus_speed <= 33) 145 if (bus_speed <= 33)
146 pci_set_drvdata(dev, (void *) aec6xxx_33_base); 146 pci_set_drvdata(dev, (void *) aec6xxx_33_base);
diff --git a/drivers/ide/pci/alim15x3.c b/drivers/ide/pci/alim15x3.c
index f2129d5e07f2..f2de00adf147 100644
--- a/drivers/ide/pci/alim15x3.c
+++ b/drivers/ide/pci/alim15x3.c
@@ -72,7 +72,7 @@ static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio)
72 int s_time, a_time, c_time; 72 int s_time, a_time, c_time;
73 u8 s_clc, a_clc, r_clc; 73 u8 s_clc, a_clc, r_clc;
74 unsigned long flags; 74 unsigned long flags;
75 int bus_speed = ide_pci_clk ? ide_pci_clk : system_bus_clock(); 75 int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
76 int port = hwif->channel ? 0x5c : 0x58; 76 int port = hwif->channel ? 0x5c : 0x58;
77 int portFIFO = hwif->channel ? 0x55 : 0x54; 77 int portFIFO = hwif->channel ? 0x55 : 0x54;
78 u8 cd_dma_fifo = 0; 78 u8 cd_dma_fifo = 0;
diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index efcf54338be7..ad222206a429 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -53,20 +53,20 @@ static void amd_set_speed(struct pci_dev *dev, u8 dn, u8 udma_mask,
53 u8 t = 0, offset = amd_offset(dev); 53 u8 t = 0, offset = amd_offset(dev);
54 54
55 pci_read_config_byte(dev, AMD_ADDRESS_SETUP + offset, &t); 55 pci_read_config_byte(dev, AMD_ADDRESS_SETUP + offset, &t);
56 t = (t & ~(3 << ((3 - dn) << 1))) | ((FIT(timing->setup, 1, 4) - 1) << ((3 - dn) << 1)); 56 t = (t & ~(3 << ((3 - dn) << 1))) | ((clamp_val(timing->setup, 1, 4) - 1) << ((3 - dn) << 1));
57 pci_write_config_byte(dev, AMD_ADDRESS_SETUP + offset, t); 57 pci_write_config_byte(dev, AMD_ADDRESS_SETUP + offset, t);
58 58
59 pci_write_config_byte(dev, AMD_8BIT_TIMING + offset + (1 - (dn >> 1)), 59 pci_write_config_byte(dev, AMD_8BIT_TIMING + offset + (1 - (dn >> 1)),
60 ((FIT(timing->act8b, 1, 16) - 1) << 4) | (FIT(timing->rec8b, 1, 16) - 1)); 60 ((clamp_val(timing->act8b, 1, 16) - 1) << 4) | (clamp_val(timing->rec8b, 1, 16) - 1));
61 61
62 pci_write_config_byte(dev, AMD_DRIVE_TIMING + offset + (3 - dn), 62 pci_write_config_byte(dev, AMD_DRIVE_TIMING + offset + (3 - dn),
63 ((FIT(timing->active, 1, 16) - 1) << 4) | (FIT(timing->recover, 1, 16) - 1)); 63 ((clamp_val(timing->active, 1, 16) - 1) << 4) | (clamp_val(timing->recover, 1, 16) - 1));
64 64
65 switch (udma_mask) { 65 switch (udma_mask) {
66 case ATA_UDMA2: t = timing->udma ? (0xc0 | (FIT(timing->udma, 2, 5) - 2)) : 0x03; break; 66 case ATA_UDMA2: t = timing->udma ? (0xc0 | (clamp_val(timing->udma, 2, 5) - 2)) : 0x03; break;
67 case ATA_UDMA4: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 2, 10)]) : 0x03; break; 67 case ATA_UDMA4: t = timing->udma ? (0xc0 | amd_cyc2udma[clamp_val(timing->udma, 2, 10)]) : 0x03; break;
68 case ATA_UDMA5: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 1, 10)]) : 0x03; break; 68 case ATA_UDMA5: t = timing->udma ? (0xc0 | amd_cyc2udma[clamp_val(timing->udma, 1, 10)]) : 0x03; break;
69 case ATA_UDMA6: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 1, 15)]) : 0x03; break; 69 case ATA_UDMA6: t = timing->udma ? (0xc0 | amd_cyc2udma[clamp_val(timing->udma, 1, 15)]) : 0x03; break;
70 default: return; 70 default: return;
71 } 71 }
72 72
@@ -179,7 +179,7 @@ static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev,
179 * Determine the system bus clock. 179 * Determine the system bus clock.
180 */ 180 */
181 181
182 amd_clock = (ide_pci_clk ? ide_pci_clk : system_bus_clock()) * 1000; 182 amd_clock = (ide_pci_clk ? ide_pci_clk : 33) * 1000;
183 183
184 switch (amd_clock) { 184 switch (amd_clock) {
185 case 33000: amd_clock = 33333; break; 185 case 33000: amd_clock = 33333; break;
diff --git a/drivers/ide/pci/cmd640.c b/drivers/ide/pci/cmd640.c
index b38a1980dcd5..cd1ba14984ab 100644
--- a/drivers/ide/pci/cmd640.c
+++ b/drivers/ide/pci/cmd640.c
@@ -525,12 +525,10 @@ static void cmd640_set_mode(ide_drive_t *drive, unsigned int index,
525 u8 setup_count, active_count, recovery_count, recovery_count2, cycle_count; 525 u8 setup_count, active_count, recovery_count, recovery_count2, cycle_count;
526 int bus_speed; 526 int bus_speed;
527 527
528 if (cmd640_vlb && ide_vlb_clk) 528 if (cmd640_vlb)
529 bus_speed = ide_vlb_clk; 529 bus_speed = ide_vlb_clk ? ide_vlb_clk : 50;
530 else if (!cmd640_vlb && ide_pci_clk)
531 bus_speed = ide_pci_clk;
532 else 530 else
533 bus_speed = system_bus_clock(); 531 bus_speed = ide_pci_clk ? ide_pci_clk : 33;
534 532
535 if (pio_mode > 5) 533 if (pio_mode > 5)
536 pio_mode = 5; 534 pio_mode = 5;
diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c
index 08674711d089..ca4774aa27ee 100644
--- a/drivers/ide/pci/cmd64x.c
+++ b/drivers/ide/pci/cmd64x.c
@@ -69,7 +69,7 @@ static u8 quantize_timing(int timing, int quant)
69static void program_cycle_times (ide_drive_t *drive, int cycle_time, int active_time) 69static void program_cycle_times (ide_drive_t *drive, int cycle_time, int active_time)
70{ 70{
71 struct pci_dev *dev = to_pci_dev(drive->hwif->dev); 71 struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
72 int clock_time = 1000 / (ide_pci_clk ? ide_pci_clk : system_bus_clock()); 72 int clock_time = 1000 / (ide_pci_clk ? ide_pci_clk : 33);
73 u8 cycle_count, active_count, recovery_count, drwtim; 73 u8 cycle_count, active_count, recovery_count, drwtim;
74 static const u8 recovery_values[] = 74 static const u8 recovery_values[] =
75 {15, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0}; 75 {15, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0};
@@ -128,7 +128,7 @@ static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio)
128 ide_pio_timings[pio].active_time); 128 ide_pio_timings[pio].active_time);
129 129
130 setup_count = quantize_timing(ide_pio_timings[pio].setup_time, 130 setup_count = quantize_timing(ide_pio_timings[pio].setup_time,
131 1000 / (ide_pci_clk ? ide_pci_clk : system_bus_clock())); 131 1000 / (ide_pci_clk ? ide_pci_clk : 33));
132 132
133 /* 133 /*
134 * The primary channel has individual address setup timing registers 134 * The primary channel has individual address setup timing registers
diff --git a/drivers/ide/pci/cy82c693.c b/drivers/ide/pci/cy82c693.c
index 77cc22c2ad45..8c534afcb6c8 100644
--- a/drivers/ide/pci/cy82c693.c
+++ b/drivers/ide/pci/cy82c693.c
@@ -134,7 +134,7 @@ static int calc_clk(int time, int bus_speed)
134static void compute_clocks(u8 pio, pio_clocks_t *p_pclk) 134static void compute_clocks(u8 pio, pio_clocks_t *p_pclk)
135{ 135{
136 int clk1, clk2; 136 int clk1, clk2;
137 int bus_speed = ide_pci_clk ? ide_pci_clk : system_bus_clock(); 137 int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
138 138
139 /* we don't check against CY82C693's min and max speed, 139 /* we don't check against CY82C693's min and max speed,
140 * so you can play with the idebus=xx parameter 140 * so you can play with the idebus=xx parameter
diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index c929dadaaaff..397c6cbe953c 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -759,8 +759,7 @@ static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
759 enable_irq (hwif->irq); 759 enable_irq (hwif->irq);
760 } 760 }
761 } else 761 } else
762 outb(mask ? (drive->ctl | 2) : (drive->ctl & ~2), 762 outb(ATA_DEVCTL_OBS | (mask ? 2 : 0), hwif->io_ports.ctl_addr);
763 hwif->io_ports.ctl_addr);
764} 763}
765 764
766/* 765/*
diff --git a/drivers/ide/pci/ns87415.c b/drivers/ide/pci/ns87415.c
index a7a41bb82778..45ba71a7182f 100644
--- a/drivers/ide/pci/ns87415.c
+++ b/drivers/ide/pci/ns87415.c
@@ -76,7 +76,7 @@ static void superio_tf_read(ide_drive_t *drive, ide_task_t *task)
76 } 76 }
77 77
78 /* be sure we're looking at the low order bits */ 78 /* be sure we're looking at the low order bits */
79 outb(drive->ctl & ~0x80, io_ports->ctl_addr); 79 outb(ATA_DEVCTL_OBS & ~0x80, io_ports->ctl_addr);
80 80
81 if (task->tf_flags & IDE_TFLAG_IN_NSECT) 81 if (task->tf_flags & IDE_TFLAG_IN_NSECT)
82 tf->nsect = inb(io_ports->nsect_addr); 82 tf->nsect = inb(io_ports->nsect_addr);
@@ -90,7 +90,7 @@ static void superio_tf_read(ide_drive_t *drive, ide_task_t *task)
90 tf->device = superio_ide_inb(io_ports->device_addr); 90 tf->device = superio_ide_inb(io_ports->device_addr);
91 91
92 if (task->tf_flags & IDE_TFLAG_LBA48) { 92 if (task->tf_flags & IDE_TFLAG_LBA48) {
93 outb(drive->ctl | 0x80, io_ports->ctl_addr); 93 outb(ATA_DEVCTL_OBS | 0x80, io_ports->ctl_addr);
94 94
95 if (task->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) 95 if (task->tf_flags & IDE_TFLAG_IN_HOB_FEATURE)
96 tf->hob_feature = inb(io_ports->feature_addr); 96 tf->hob_feature = inb(io_ports->feature_addr);
diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c
index 910fb00deb71..1584ebb6a185 100644
--- a/drivers/ide/pci/scc_pata.c
+++ b/drivers/ide/pci/scc_pata.c
@@ -148,11 +148,8 @@ static void scc_ide_outb(u8 addr, unsigned long port)
148 out_be32((void*)port, addr); 148 out_be32((void*)port, addr);
149} 149}
150 150
151static void 151static void scc_ide_outbsync(ide_hwif_t *hwif, u8 addr, unsigned long port)
152scc_ide_outbsync(ide_drive_t * drive, u8 addr, unsigned long port)
153{ 152{
154 ide_hwif_t *hwif = HWIF(drive);
155
156 out_be32((void*)port, addr); 153 out_be32((void*)port, addr);
157 eieio(); 154 eieio();
158 in_be32((void*)(hwif->dma_base + 0x01c)); 155 in_be32((void*)(hwif->dma_base + 0x01c));
@@ -662,8 +659,6 @@ static void scc_tf_load(ide_drive_t *drive, ide_task_t *task)
662 if (task->tf_flags & IDE_TFLAG_FLAGGED) 659 if (task->tf_flags & IDE_TFLAG_FLAGGED)
663 HIHI = 0xFF; 660 HIHI = 0xFF;
664 661
665 ide_set_irq(drive, 1);
666
667 if (task->tf_flags & IDE_TFLAG_OUT_DATA) 662 if (task->tf_flags & IDE_TFLAG_OUT_DATA)
668 out_be32((void *)io_ports->data_addr, 663 out_be32((void *)io_ports->data_addr,
669 (tf->hob_data << 8) | tf->data); 664 (tf->hob_data << 8) | tf->data);
@@ -708,7 +703,7 @@ static void scc_tf_read(ide_drive_t *drive, ide_task_t *task)
708 } 703 }
709 704
710 /* be sure we're looking at the low order bits */ 705 /* be sure we're looking at the low order bits */
711 scc_ide_outb(drive->ctl & ~0x80, io_ports->ctl_addr); 706 scc_ide_outb(ATA_DEVCTL_OBS & ~0x80, io_ports->ctl_addr);
712 707
713 if (task->tf_flags & IDE_TFLAG_IN_NSECT) 708 if (task->tf_flags & IDE_TFLAG_IN_NSECT)
714 tf->nsect = scc_ide_inb(io_ports->nsect_addr); 709 tf->nsect = scc_ide_inb(io_ports->nsect_addr);
@@ -722,7 +717,7 @@ static void scc_tf_read(ide_drive_t *drive, ide_task_t *task)
722 tf->device = scc_ide_inb(io_ports->device_addr); 717 tf->device = scc_ide_inb(io_ports->device_addr);
723 718
724 if (task->tf_flags & IDE_TFLAG_LBA48) { 719 if (task->tf_flags & IDE_TFLAG_LBA48) {
725 scc_ide_outb(drive->ctl | 0x80, io_ports->ctl_addr); 720 scc_ide_outb(ATA_DEVCTL_OBS | 0x80, io_ports->ctl_addr);
726 721
727 if (task->tf_flags & IDE_TFLAG_IN_HOB_FEATURE) 722 if (task->tf_flags & IDE_TFLAG_IN_HOB_FEATURE)
728 tf->hob_feature = scc_ide_inb(io_ports->feature_addr); 723 tf->hob_feature = scc_ide_inb(io_ports->feature_addr);
@@ -795,7 +790,6 @@ static void __devinit init_mmio_iops_scc(ide_hwif_t *hwif)
795 790
796 hwif->dma_base = dma_base; 791 hwif->dma_base = dma_base;
797 hwif->config_data = ports->ctl; 792 hwif->config_data = ports->ctl;
798 hwif->mmio = 1;
799} 793}
800 794
801/** 795/**
diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c
index 16a0bce17d69..24513e3dcd6b 100644
--- a/drivers/ide/pci/sgiioc4.c
+++ b/drivers/ide/pci/sgiioc4.c
@@ -111,7 +111,7 @@ sgiioc4_init_hwif_ports(hw_regs_t * hw, unsigned long data_port,
111static void 111static void
112sgiioc4_maskproc(ide_drive_t * drive, int mask) 112sgiioc4_maskproc(ide_drive_t * drive, int mask)
113{ 113{
114 writeb(mask ? (drive->ctl | 2) : (drive->ctl & ~2), 114 writeb(ATA_DEVCTL_OBS | (mask ? 2 : 0),
115 (void __iomem *)drive->hwif->io_ports.ctl_addr); 115 (void __iomem *)drive->hwif->io_ports.ctl_addr);
116} 116}
117 117
@@ -369,8 +369,7 @@ ide_dma_sgiioc4(ide_hwif_t *hwif, const struct ide_port_info *d)
369 hwif->sg_max_nents = IOC4_PRD_ENTRIES; 369 hwif->sg_max_nents = IOC4_PRD_ENTRIES;
370 370
371 pad = pci_alloc_consistent(dev, IOC4_IDE_CACHELINE_SIZE, 371 pad = pci_alloc_consistent(dev, IOC4_IDE_CACHELINE_SIZE,
372 (dma_addr_t *) &(hwif->dma_status)); 372 (dma_addr_t *)&hwif->extra_base);
373
374 if (pad) { 373 if (pad) {
375 ide_set_hwifdata(hwif, pad); 374 ide_set_hwifdata(hwif, pad);
376 return 0; 375 return 0;
@@ -439,7 +438,7 @@ sgiioc4_configure_for_dma(int dma_direction, ide_drive_t * drive)
439 438
440 /* Address of the Ending DMA */ 439 /* Address of the Ending DMA */
441 memset(ide_get_hwifdata(hwif), 0, IOC4_IDE_CACHELINE_SIZE); 440 memset(ide_get_hwifdata(hwif), 0, IOC4_IDE_CACHELINE_SIZE);
442 ending_dma_addr = cpu_to_le32(hwif->dma_status); 441 ending_dma_addr = cpu_to_le32(hwif->extra_base);
443 writel(ending_dma_addr, (void __iomem *)(dma_base + IOC4_DMA_END_ADDR * 4)); 442 writel(ending_dma_addr, (void __iomem *)(dma_base + IOC4_DMA_END_ADDR * 4));
444 443
445 writel(dma_direction, (void __iomem *)ioc4_dma_addr); 444 writel(dma_direction, (void __iomem *)ioc4_dma_addr);
diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c
index 0006b9e58567..b75e9bb390a7 100644
--- a/drivers/ide/pci/siimage.c
+++ b/drivers/ide/pci/siimage.c
@@ -94,7 +94,7 @@ static unsigned long siimage_selreg(ide_hwif_t *hwif, int r)
94 unsigned long base = (unsigned long)hwif->hwif_data; 94 unsigned long base = (unsigned long)hwif->hwif_data;
95 95
96 base += 0xA0 + r; 96 base += 0xA0 + r;
97 if (hwif->mmio) 97 if (hwif->host_flags & IDE_HFLAG_MMIO)
98 base += hwif->channel << 6; 98 base += hwif->channel << 6;
99 else 99 else
100 base += hwif->channel << 4; 100 base += hwif->channel << 4;
@@ -117,7 +117,7 @@ static inline unsigned long siimage_seldev(ide_drive_t *drive, int r)
117 unsigned long base = (unsigned long)hwif->hwif_data; 117 unsigned long base = (unsigned long)hwif->hwif_data;
118 118
119 base += 0xA0 + r; 119 base += 0xA0 + r;
120 if (hwif->mmio) 120 if (hwif->host_flags & IDE_HFLAG_MMIO)
121 base += hwif->channel << 6; 121 base += hwif->channel << 6;
122 else 122 else
123 base += hwif->channel << 4; 123 base += hwif->channel << 4;
@@ -190,7 +190,9 @@ static u8 sil_pata_udma_filter(ide_drive_t *drive)
190 unsigned long base = (unsigned long)hwif->hwif_data; 190 unsigned long base = (unsigned long)hwif->hwif_data;
191 u8 scsc, mask = 0; 191 u8 scsc, mask = 0;
192 192
193 scsc = sil_ioread8(dev, base + (hwif->mmio ? 0x4A : 0x8A)); 193 base += (hwif->host_flags & IDE_HFLAG_MMIO) ? 0x4A : 0x8A;
194
195 scsc = sil_ioread8(dev, base);
194 196
195 switch (scsc & 0x30) { 197 switch (scsc & 0x30) {
196 case 0x10: /* 133 */ 198 case 0x10: /* 133 */
@@ -238,8 +240,9 @@ static void sil_set_pio_mode(ide_drive_t *drive, u8 pio)
238 unsigned long tfaddr = siimage_selreg(hwif, 0x02); 240 unsigned long tfaddr = siimage_selreg(hwif, 0x02);
239 unsigned long base = (unsigned long)hwif->hwif_data; 241 unsigned long base = (unsigned long)hwif->hwif_data;
240 u8 tf_pio = pio; 242 u8 tf_pio = pio;
241 u8 addr_mask = hwif->channel ? (hwif->mmio ? 0xF4 : 0x84) 243 u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
242 : (hwif->mmio ? 0xB4 : 0x80); 244 u8 addr_mask = hwif->channel ? (mmio ? 0xF4 : 0x84)
245 : (mmio ? 0xB4 : 0x80);
243 u8 mode = 0; 246 u8 mode = 0;
244 u8 unit = drive->select.b.unit; 247 u8 unit = drive->select.b.unit;
245 248
@@ -290,13 +293,13 @@ static void sil_set_dma_mode(ide_drive_t *drive, const u8 speed)
290 u16 ultra = 0, multi = 0; 293 u16 ultra = 0, multi = 0;
291 u8 mode = 0, unit = drive->select.b.unit; 294 u8 mode = 0, unit = drive->select.b.unit;
292 unsigned long base = (unsigned long)hwif->hwif_data; 295 unsigned long base = (unsigned long)hwif->hwif_data;
293 u8 scsc = 0, addr_mask = hwif->channel ? 296 u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
294 (hwif->mmio ? 0xF4 : 0x84) : 297 u8 scsc = 0, addr_mask = hwif->channel ? (mmio ? 0xF4 : 0x84)
295 (hwif->mmio ? 0xB4 : 0x80); 298 : (mmio ? 0xB4 : 0x80);
296 unsigned long ma = siimage_seldev(drive, 0x08); 299 unsigned long ma = siimage_seldev(drive, 0x08);
297 unsigned long ua = siimage_seldev(drive, 0x0C); 300 unsigned long ua = siimage_seldev(drive, 0x0C);
298 301
299 scsc = sil_ioread8 (dev, base + (hwif->mmio ? 0x4A : 0x8A)); 302 scsc = sil_ioread8 (dev, base + (mmio ? 0x4A : 0x8A));
300 mode = sil_ioread8 (dev, base + addr_mask); 303 mode = sil_ioread8 (dev, base + addr_mask);
301 multi = sil_ioread16(dev, ma); 304 multi = sil_ioread16(dev, ma);
302 ultra = sil_ioread16(dev, ua); 305 ultra = sil_ioread16(dev, ua);
@@ -391,7 +394,7 @@ static int siimage_mmio_dma_test_irq(ide_drive_t *drive)
391 394
392static int siimage_dma_test_irq(ide_drive_t *drive) 395static int siimage_dma_test_irq(ide_drive_t *drive)
393{ 396{
394 if (drive->hwif->mmio) 397 if (drive->hwif->host_flags & IDE_HFLAG_MMIO)
395 return siimage_mmio_dma_test_irq(drive); 398 return siimage_mmio_dma_test_irq(drive);
396 else 399 else
397 return siimage_io_dma_test_irq(drive); 400 return siimage_io_dma_test_irq(drive);
@@ -640,8 +643,6 @@ static void __devinit init_mmio_iops_siimage(ide_hwif_t *hwif)
640 hwif->irq = dev->irq; 643 hwif->irq = dev->irq;
641 644
642 hwif->dma_base = (unsigned long)addr + (ch ? 0x08 : 0x00); 645 hwif->dma_base = (unsigned long)addr + (ch ? 0x08 : 0x00);
643
644 hwif->mmio = 1;
645} 646}
646 647
647static int is_dev_seagate_sata(ide_drive_t *drive) 648static int is_dev_seagate_sata(ide_drive_t *drive)
diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index 566e0ecb8db1..3ed9728abd24 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -120,21 +120,21 @@ static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing)
120 120
121 if (~vdev->via_config->flags & VIA_BAD_AST) { 121 if (~vdev->via_config->flags & VIA_BAD_AST) {
122 pci_read_config_byte(dev, VIA_ADDRESS_SETUP, &t); 122 pci_read_config_byte(dev, VIA_ADDRESS_SETUP, &t);
123 t = (t & ~(3 << ((3 - dn) << 1))) | ((FIT(timing->setup, 1, 4) - 1) << ((3 - dn) << 1)); 123 t = (t & ~(3 << ((3 - dn) << 1))) | ((clamp_val(timing->setup, 1, 4) - 1) << ((3 - dn) << 1));
124 pci_write_config_byte(dev, VIA_ADDRESS_SETUP, t); 124 pci_write_config_byte(dev, VIA_ADDRESS_SETUP, t);
125 } 125 }
126 126
127 pci_write_config_byte(dev, VIA_8BIT_TIMING + (1 - (dn >> 1)), 127 pci_write_config_byte(dev, VIA_8BIT_TIMING + (1 - (dn >> 1)),
128 ((FIT(timing->act8b, 1, 16) - 1) << 4) | (FIT(timing->rec8b, 1, 16) - 1)); 128 ((clamp_val(timing->act8b, 1, 16) - 1) << 4) | (clamp_val(timing->rec8b, 1, 16) - 1));
129 129
130 pci_write_config_byte(dev, VIA_DRIVE_TIMING + (3 - dn), 130 pci_write_config_byte(dev, VIA_DRIVE_TIMING + (3 - dn),
131 ((FIT(timing->active, 1, 16) - 1) << 4) | (FIT(timing->recover, 1, 16) - 1)); 131 ((clamp_val(timing->active, 1, 16) - 1) << 4) | (clamp_val(timing->recover, 1, 16) - 1));
132 132
133 switch (vdev->via_config->udma_mask) { 133 switch (vdev->via_config->udma_mask) {
134 case ATA_UDMA2: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 5) - 2)) : 0x03; break; 134 case ATA_UDMA2: t = timing->udma ? (0xe0 | (clamp_val(timing->udma, 2, 5) - 2)) : 0x03; break;
135 case ATA_UDMA4: t = timing->udma ? (0xe8 | (FIT(timing->udma, 2, 9) - 2)) : 0x0f; break; 135 case ATA_UDMA4: t = timing->udma ? (0xe8 | (clamp_val(timing->udma, 2, 9) - 2)) : 0x0f; break;
136 case ATA_UDMA5: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 9) - 2)) : 0x07; break; 136 case ATA_UDMA5: t = timing->udma ? (0xe0 | (clamp_val(timing->udma, 2, 9) - 2)) : 0x07; break;
137 case ATA_UDMA6: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 9) - 2)) : 0x07; break; 137 case ATA_UDMA6: t = timing->udma ? (0xe0 | (clamp_val(timing->udma, 2, 9) - 2)) : 0x07; break;
138 default: return; 138 default: return;
139 } 139 }
140 140
@@ -340,7 +340,7 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const
340 * Determine system bus clock. 340 * Determine system bus clock.
341 */ 341 */
342 342
343 via_clock = (ide_pci_clk ? ide_pci_clk : system_bus_clock()) * 1000; 343 via_clock = (ide_pci_clk ? ide_pci_clk : 33) * 1000;
344 344
345 switch (via_clock) { 345 switch (via_clock) {
346 case 33000: via_clock = 33333; break; 346 case 33000: via_clock = 33333; break;
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c
index ba2d58727964..dcb2c466bb97 100644
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -480,13 +480,13 @@ pmac_ide_do_update_timings(ide_drive_t *drive)
480 pmac_ide_selectproc(drive); 480 pmac_ide_selectproc(drive);
481} 481}
482 482
483static void 483static void pmac_outbsync(ide_hwif_t *hwif, u8 value, unsigned long port)
484pmac_outbsync(ide_drive_t *drive, u8 value, unsigned long port)
485{ 484{
486 u32 tmp; 485 u32 tmp;
487 486
488 writeb(value, (void __iomem *) port); 487 writeb(value, (void __iomem *) port);
489 tmp = readl(PMAC_IDE_REG(IDE_TIMING_CONFIG)); 488 tmp = readl((void __iomem *)(hwif->io_ports.data_addr
489 + IDE_TIMING_CONFIG));
490} 490}
491 491
492/* 492/*
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index 5171601fb255..abcfb1739d4d 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -87,7 +87,7 @@ unsigned long ide_pci_dma_base(ide_hwif_t *hwif, const struct ide_port_info *d)
87 unsigned long dma_base = 0; 87 unsigned long dma_base = 0;
88 u8 dma_stat = 0; 88 u8 dma_stat = 0;
89 89
90 if (hwif->mmio) 90 if (hwif->host_flags & IDE_HFLAG_MMIO)
91 return hwif->dma_base; 91 return hwif->dma_base;
92 92
93 if (hwif->mate && hwif->mate->dma_base) { 93 if (hwif->mate && hwif->mate->dma_base) {
@@ -374,7 +374,7 @@ int ide_hwif_setup_dma(ide_hwif_t *hwif, const struct ide_port_info *d)
374 if (base == 0 || ide_pci_set_master(dev, d->name) < 0) 374 if (base == 0 || ide_pci_set_master(dev, d->name) < 0)
375 return -1; 375 return -1;
376 376
377 if (hwif->mmio) 377 if (hwif->host_flags & IDE_HFLAG_MMIO)
378 printk(KERN_INFO " %s: MMIO-DMA\n", hwif->name); 378 printk(KERN_INFO " %s: MMIO-DMA\n", hwif->name);
379 else 379 else
380 printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n", 380 printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n",
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 781ea5950373..09a2bec7fd32 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -4,28 +4,33 @@
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005 Intel Corporation. All rights reserved. 5 * Copyright (c) 2005 Intel Corporation. All rights reserved.
6 * 6 *
7 * This Software is licensed under one of the following licenses: 7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
8 * 12 *
9 * 1) under the terms of the "Common Public License 1.0" a copy of which is 13 * Redistribution and use in source and binary forms, with or
10 * available from the Open Source Initiative, see 14 * without modification, are permitted provided that the following
11 * http://www.opensource.org/licenses/cpl.php. 15 * conditions are met:
12 * 16 *
13 * 2) under the terms of the "The BSD License" a copy of which is 17 * - Redistributions of source code must retain the above
14 * available from the Open Source Initiative, see 18 * copyright notice, this list of conditions and the following
15 * http://www.opensource.org/licenses/bsd-license.php. 19 * disclaimer.
16 * 20 *
17 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a 21 * - Redistributions in binary form must reproduce the above
18 * copy of which is available from the Open Source Initiative, see 22 * copyright notice, this list of conditions and the following
19 * http://www.opensource.org/licenses/gpl-license.php. 23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
20 * 25 *
21 * Licensee has the right to choose one of the above licenses. 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * Redistributions of source code must retain the above copyright 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 * notice and one of the license notices. 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
25 * 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
26 * Redistributions in binary form must reproduce both the above copyright 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
27 * notice, one of the license notices in the documentation 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 * and/or other materials provided with the distribution. 33 * SOFTWARE.
29 */ 34 */
30 35
31#include <linux/mutex.h> 36#include <linux/mutex.h>
@@ -100,6 +105,7 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
100 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); 105 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
101 if (dst_dev_addr) 106 if (dst_dev_addr)
102 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); 107 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
108 dev_addr->src_dev = dev;
103 return 0; 109 return 0;
104} 110}
105EXPORT_SYMBOL(rdma_copy_addr); 111EXPORT_SYMBOL(rdma_copy_addr);
diff --git a/drivers/infiniband/core/agent.h b/drivers/infiniband/core/agent.h
index fb9ed1489f95..6669287009c2 100644
--- a/drivers/infiniband/core/agent.h
+++ b/drivers/infiniband/core/agent.h
@@ -32,8 +32,6 @@
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE. 34 * SOFTWARE.
35 *
36 * $Id: agent.h 1389 2004-12-27 22:56:47Z roland $
37 */ 35 */
38 36
39#ifndef __AGENT_H_ 37#ifndef __AGENT_H_
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index e85f7013de57..68883565b725 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -31,8 +31,6 @@
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE. 33 * SOFTWARE.
34 *
35 * $Id: cache.c 1349 2004-12-16 21:09:43Z roland $
36 */ 34 */
37 35
38#include <linux/module.h> 36#include <linux/module.h>
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index a47fe64e5c39..55738eead3bf 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -31,8 +31,6 @@
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE. 33 * SOFTWARE.
34 *
35 * $Id: cm.c 4311 2005-12-05 18:42:01Z sean.hefty $
36 */ 34 */
37 35
38#include <linux/completion.h> 36#include <linux/completion.h>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 671f13738054..ae11d5cc74d0 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -4,29 +4,33 @@
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 5 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
6 * 6 *
7 * This Software is licensed under one of the following licenses: 7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
8 * 12 *
9 * 1) under the terms of the "Common Public License 1.0" a copy of which is 13 * Redistribution and use in source and binary forms, with or
10 * available from the Open Source Initiative, see 14 * without modification, are permitted provided that the following
11 * http://www.opensource.org/licenses/cpl.php. 15 * conditions are met:
12 * 16 *
13 * 2) under the terms of the "The BSD License" a copy of which is 17 * - Redistributions of source code must retain the above
14 * available from the Open Source Initiative, see 18 * copyright notice, this list of conditions and the following
15 * http://www.opensource.org/licenses/bsd-license.php. 19 * disclaimer.
16 * 20 *
17 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a 21 * - Redistributions in binary form must reproduce the above
18 * copy of which is available from the Open Source Initiative, see 22 * copyright notice, this list of conditions and the following
19 * http://www.opensource.org/licenses/gpl-license.php. 23 * disclaimer in the documentation and/or other materials
20 * 24 * provided with the distribution.
21 * Licensee has the right to choose one of the above licenses.
22 *
23 * Redistributions of source code must retain the above copyright
24 * notice and one of the license notices.
25 *
26 * Redistributions in binary form must reproduce both the above copyright
27 * notice, one of the license notices in the documentation
28 * and/or other materials provided with the distribution.
29 * 25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
30 */ 34 */
31 35
32#include <linux/completion.h> 36#include <linux/completion.h>
@@ -126,8 +130,7 @@ struct rdma_id_private {
126 130
127 struct completion comp; 131 struct completion comp;
128 atomic_t refcount; 132 atomic_t refcount;
129 wait_queue_head_t wait_remove; 133 struct mutex handler_mutex;
130 atomic_t dev_remove;
131 134
132 int backlog; 135 int backlog;
133 int timeout_ms; 136 int timeout_ms;
@@ -351,26 +354,15 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
351 complete(&id_priv->comp); 354 complete(&id_priv->comp);
352} 355}
353 356
354static int cma_disable_remove(struct rdma_id_private *id_priv, 357static int cma_disable_callback(struct rdma_id_private *id_priv,
355 enum cma_state state) 358 enum cma_state state)
356{ 359{
357 unsigned long flags; 360 mutex_lock(&id_priv->handler_mutex);
358 int ret; 361 if (id_priv->state != state) {
359 362 mutex_unlock(&id_priv->handler_mutex);
360 spin_lock_irqsave(&id_priv->lock, flags); 363 return -EINVAL;
361 if (id_priv->state == state) { 364 }
362 atomic_inc(&id_priv->dev_remove); 365 return 0;
363 ret = 0;
364 } else
365 ret = -EINVAL;
366 spin_unlock_irqrestore(&id_priv->lock, flags);
367 return ret;
368}
369
370static void cma_enable_remove(struct rdma_id_private *id_priv)
371{
372 if (atomic_dec_and_test(&id_priv->dev_remove))
373 wake_up(&id_priv->wait_remove);
374} 366}
375 367
376static int cma_has_cm_dev(struct rdma_id_private *id_priv) 368static int cma_has_cm_dev(struct rdma_id_private *id_priv)
@@ -395,8 +387,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
395 mutex_init(&id_priv->qp_mutex); 387 mutex_init(&id_priv->qp_mutex);
396 init_completion(&id_priv->comp); 388 init_completion(&id_priv->comp);
397 atomic_set(&id_priv->refcount, 1); 389 atomic_set(&id_priv->refcount, 1);
398 init_waitqueue_head(&id_priv->wait_remove); 390 mutex_init(&id_priv->handler_mutex);
399 atomic_set(&id_priv->dev_remove, 0);
400 INIT_LIST_HEAD(&id_priv->listen_list); 391 INIT_LIST_HEAD(&id_priv->listen_list);
401 INIT_LIST_HEAD(&id_priv->mc_list); 392 INIT_LIST_HEAD(&id_priv->mc_list);
402 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 393 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
@@ -923,7 +914,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
923 struct rdma_cm_event event; 914 struct rdma_cm_event event;
924 int ret = 0; 915 int ret = 0;
925 916
926 if (cma_disable_remove(id_priv, CMA_CONNECT)) 917 if (cma_disable_callback(id_priv, CMA_CONNECT))
927 return 0; 918 return 0;
928 919
929 memset(&event, 0, sizeof event); 920 memset(&event, 0, sizeof event);
@@ -970,7 +961,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
970 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; 961 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
971 break; 962 break;
972 default: 963 default:
973 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d", 964 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
974 ib_event->event); 965 ib_event->event);
975 goto out; 966 goto out;
976 } 967 }
@@ -980,12 +971,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
980 /* Destroy the CM ID by returning a non-zero value. */ 971 /* Destroy the CM ID by returning a non-zero value. */
981 id_priv->cm_id.ib = NULL; 972 id_priv->cm_id.ib = NULL;
982 cma_exch(id_priv, CMA_DESTROYING); 973 cma_exch(id_priv, CMA_DESTROYING);
983 cma_enable_remove(id_priv); 974 mutex_unlock(&id_priv->handler_mutex);
984 rdma_destroy_id(&id_priv->id); 975 rdma_destroy_id(&id_priv->id);
985 return ret; 976 return ret;
986 } 977 }
987out: 978out:
988 cma_enable_remove(id_priv); 979 mutex_unlock(&id_priv->handler_mutex);
989 return ret; 980 return ret;
990} 981}
991 982
@@ -998,6 +989,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
998 union cma_ip_addr *src, *dst; 989 union cma_ip_addr *src, *dst;
999 __be16 port; 990 __be16 port;
1000 u8 ip_ver; 991 u8 ip_ver;
992 int ret;
1001 993
1002 if (cma_get_net_info(ib_event->private_data, listen_id->ps, 994 if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1003 &ip_ver, &port, &src, &dst)) 995 &ip_ver, &port, &src, &dst))
@@ -1022,10 +1014,11 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1022 if (rt->num_paths == 2) 1014 if (rt->num_paths == 2)
1023 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 1015 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1024 1016
1025 ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1026 ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); 1017 ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1027 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); 1018 ret = rdma_translate_ip(&id->route.addr.src_addr,
1028 rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA; 1019 &id->route.addr.dev_addr);
1020 if (ret)
1021 goto destroy_id;
1029 1022
1030 id_priv = container_of(id, struct rdma_id_private, id); 1023 id_priv = container_of(id, struct rdma_id_private, id);
1031 id_priv->state = CMA_CONNECT; 1024 id_priv->state = CMA_CONNECT;
@@ -1095,7 +1088,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1095 int offset, ret; 1088 int offset, ret;
1096 1089
1097 listen_id = cm_id->context; 1090 listen_id = cm_id->context;
1098 if (cma_disable_remove(listen_id, CMA_LISTEN)) 1091 if (cma_disable_callback(listen_id, CMA_LISTEN))
1099 return -ECONNABORTED; 1092 return -ECONNABORTED;
1100 1093
1101 memset(&event, 0, sizeof event); 1094 memset(&event, 0, sizeof event);
@@ -1116,7 +1109,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1116 goto out; 1109 goto out;
1117 } 1110 }
1118 1111
1119 atomic_inc(&conn_id->dev_remove); 1112 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1120 mutex_lock(&lock); 1113 mutex_lock(&lock);
1121 ret = cma_acquire_dev(conn_id); 1114 ret = cma_acquire_dev(conn_id);
1122 mutex_unlock(&lock); 1115 mutex_unlock(&lock);
@@ -1138,7 +1131,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1138 !cma_is_ud_ps(conn_id->id.ps)) 1131 !cma_is_ud_ps(conn_id->id.ps))
1139 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 1132 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1140 mutex_unlock(&lock); 1133 mutex_unlock(&lock);
1141 cma_enable_remove(conn_id); 1134 mutex_unlock(&conn_id->handler_mutex);
1142 goto out; 1135 goto out;
1143 } 1136 }
1144 1137
@@ -1147,11 +1140,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1147 1140
1148release_conn_id: 1141release_conn_id:
1149 cma_exch(conn_id, CMA_DESTROYING); 1142 cma_exch(conn_id, CMA_DESTROYING);
1150 cma_enable_remove(conn_id); 1143 mutex_unlock(&conn_id->handler_mutex);
1151 rdma_destroy_id(&conn_id->id); 1144 rdma_destroy_id(&conn_id->id);
1152 1145
1153out: 1146out:
1154 cma_enable_remove(listen_id); 1147 mutex_unlock(&listen_id->handler_mutex);
1155 return ret; 1148 return ret;
1156} 1149}
1157 1150
@@ -1217,7 +1210,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1217 struct sockaddr_in *sin; 1210 struct sockaddr_in *sin;
1218 int ret = 0; 1211 int ret = 0;
1219 1212
1220 if (cma_disable_remove(id_priv, CMA_CONNECT)) 1213 if (cma_disable_callback(id_priv, CMA_CONNECT))
1221 return 0; 1214 return 0;
1222 1215
1223 memset(&event, 0, sizeof event); 1216 memset(&event, 0, sizeof event);
@@ -1261,12 +1254,12 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1261 /* Destroy the CM ID by returning a non-zero value. */ 1254 /* Destroy the CM ID by returning a non-zero value. */
1262 id_priv->cm_id.iw = NULL; 1255 id_priv->cm_id.iw = NULL;
1263 cma_exch(id_priv, CMA_DESTROYING); 1256 cma_exch(id_priv, CMA_DESTROYING);
1264 cma_enable_remove(id_priv); 1257 mutex_unlock(&id_priv->handler_mutex);
1265 rdma_destroy_id(&id_priv->id); 1258 rdma_destroy_id(&id_priv->id);
1266 return ret; 1259 return ret;
1267 } 1260 }
1268 1261
1269 cma_enable_remove(id_priv); 1262 mutex_unlock(&id_priv->handler_mutex);
1270 return ret; 1263 return ret;
1271} 1264}
1272 1265
@@ -1282,7 +1275,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1282 struct ib_device_attr attr; 1275 struct ib_device_attr attr;
1283 1276
1284 listen_id = cm_id->context; 1277 listen_id = cm_id->context;
1285 if (cma_disable_remove(listen_id, CMA_LISTEN)) 1278 if (cma_disable_callback(listen_id, CMA_LISTEN))
1286 return -ECONNABORTED; 1279 return -ECONNABORTED;
1287 1280
1288 /* Create a new RDMA id for the new IW CM ID */ 1281 /* Create a new RDMA id for the new IW CM ID */
@@ -1294,19 +1287,19 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1294 goto out; 1287 goto out;
1295 } 1288 }
1296 conn_id = container_of(new_cm_id, struct rdma_id_private, id); 1289 conn_id = container_of(new_cm_id, struct rdma_id_private, id);
1297 atomic_inc(&conn_id->dev_remove); 1290 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1298 conn_id->state = CMA_CONNECT; 1291 conn_id->state = CMA_CONNECT;
1299 1292
1300 dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr); 1293 dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
1301 if (!dev) { 1294 if (!dev) {
1302 ret = -EADDRNOTAVAIL; 1295 ret = -EADDRNOTAVAIL;
1303 cma_enable_remove(conn_id); 1296 mutex_unlock(&conn_id->handler_mutex);
1304 rdma_destroy_id(new_cm_id); 1297 rdma_destroy_id(new_cm_id);
1305 goto out; 1298 goto out;
1306 } 1299 }
1307 ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL); 1300 ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
1308 if (ret) { 1301 if (ret) {
1309 cma_enable_remove(conn_id); 1302 mutex_unlock(&conn_id->handler_mutex);
1310 rdma_destroy_id(new_cm_id); 1303 rdma_destroy_id(new_cm_id);
1311 goto out; 1304 goto out;
1312 } 1305 }
@@ -1315,7 +1308,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1315 ret = cma_acquire_dev(conn_id); 1308 ret = cma_acquire_dev(conn_id);
1316 mutex_unlock(&lock); 1309 mutex_unlock(&lock);
1317 if (ret) { 1310 if (ret) {
1318 cma_enable_remove(conn_id); 1311 mutex_unlock(&conn_id->handler_mutex);
1319 rdma_destroy_id(new_cm_id); 1312 rdma_destroy_id(new_cm_id);
1320 goto out; 1313 goto out;
1321 } 1314 }
@@ -1331,7 +1324,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1331 1324
1332 ret = ib_query_device(conn_id->id.device, &attr); 1325 ret = ib_query_device(conn_id->id.device, &attr);
1333 if (ret) { 1326 if (ret) {
1334 cma_enable_remove(conn_id); 1327 mutex_unlock(&conn_id->handler_mutex);
1335 rdma_destroy_id(new_cm_id); 1328 rdma_destroy_id(new_cm_id);
1336 goto out; 1329 goto out;
1337 } 1330 }
@@ -1347,14 +1340,17 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1347 /* User wants to destroy the CM ID */ 1340 /* User wants to destroy the CM ID */
1348 conn_id->cm_id.iw = NULL; 1341 conn_id->cm_id.iw = NULL;
1349 cma_exch(conn_id, CMA_DESTROYING); 1342 cma_exch(conn_id, CMA_DESTROYING);
1350 cma_enable_remove(conn_id); 1343 mutex_unlock(&conn_id->handler_mutex);
1351 rdma_destroy_id(&conn_id->id); 1344 rdma_destroy_id(&conn_id->id);
1345 goto out;
1352 } 1346 }
1353 1347
1348 mutex_unlock(&conn_id->handler_mutex);
1349
1354out: 1350out:
1355 if (dev) 1351 if (dev)
1356 dev_put(dev); 1352 dev_put(dev);
1357 cma_enable_remove(listen_id); 1353 mutex_unlock(&listen_id->handler_mutex);
1358 return ret; 1354 return ret;
1359} 1355}
1360 1356
@@ -1446,7 +1442,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1446 ret = rdma_listen(id, id_priv->backlog); 1442 ret = rdma_listen(id, id_priv->backlog);
1447 if (ret) 1443 if (ret)
1448 printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, " 1444 printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
1449 "listening on device %s", ret, cma_dev->device->name); 1445 "listening on device %s\n", ret, cma_dev->device->name);
1450} 1446}
1451 1447
1452static void cma_listen_on_all(struct rdma_id_private *id_priv) 1448static void cma_listen_on_all(struct rdma_id_private *id_priv)
@@ -1586,7 +1582,7 @@ static void cma_work_handler(struct work_struct *_work)
1586 struct rdma_id_private *id_priv = work->id; 1582 struct rdma_id_private *id_priv = work->id;
1587 int destroy = 0; 1583 int destroy = 0;
1588 1584
1589 atomic_inc(&id_priv->dev_remove); 1585 mutex_lock(&id_priv->handler_mutex);
1590 if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) 1586 if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1591 goto out; 1587 goto out;
1592 1588
@@ -1595,7 +1591,7 @@ static void cma_work_handler(struct work_struct *_work)
1595 destroy = 1; 1591 destroy = 1;
1596 } 1592 }
1597out: 1593out:
1598 cma_enable_remove(id_priv); 1594 mutex_unlock(&id_priv->handler_mutex);
1599 cma_deref_id(id_priv); 1595 cma_deref_id(id_priv);
1600 if (destroy) 1596 if (destroy)
1601 rdma_destroy_id(&id_priv->id); 1597 rdma_destroy_id(&id_priv->id);
@@ -1758,7 +1754,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
1758 struct rdma_cm_event event; 1754 struct rdma_cm_event event;
1759 1755
1760 memset(&event, 0, sizeof event); 1756 memset(&event, 0, sizeof event);
1761 atomic_inc(&id_priv->dev_remove); 1757 mutex_lock(&id_priv->handler_mutex);
1762 1758
1763 /* 1759 /*
1764 * Grab mutex to block rdma_destroy_id() from removing the device while 1760 * Grab mutex to block rdma_destroy_id() from removing the device while
@@ -1787,13 +1783,13 @@ static void addr_handler(int status, struct sockaddr *src_addr,
1787 1783
1788 if (id_priv->id.event_handler(&id_priv->id, &event)) { 1784 if (id_priv->id.event_handler(&id_priv->id, &event)) {
1789 cma_exch(id_priv, CMA_DESTROYING); 1785 cma_exch(id_priv, CMA_DESTROYING);
1790 cma_enable_remove(id_priv); 1786 mutex_unlock(&id_priv->handler_mutex);
1791 cma_deref_id(id_priv); 1787 cma_deref_id(id_priv);
1792 rdma_destroy_id(&id_priv->id); 1788 rdma_destroy_id(&id_priv->id);
1793 return; 1789 return;
1794 } 1790 }
1795out: 1791out:
1796 cma_enable_remove(id_priv); 1792 mutex_unlock(&id_priv->handler_mutex);
1797 cma_deref_id(id_priv); 1793 cma_deref_id(id_priv);
1798} 1794}
1799 1795
@@ -2120,7 +2116,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2120 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; 2116 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
2121 int ret = 0; 2117 int ret = 0;
2122 2118
2123 if (cma_disable_remove(id_priv, CMA_CONNECT)) 2119 if (cma_disable_callback(id_priv, CMA_CONNECT))
2124 return 0; 2120 return 0;
2125 2121
2126 memset(&event, 0, sizeof event); 2122 memset(&event, 0, sizeof event);
@@ -2151,7 +2147,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2151 event.status = 0; 2147 event.status = 0;
2152 break; 2148 break;
2153 default: 2149 default:
2154 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d", 2150 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
2155 ib_event->event); 2151 ib_event->event);
2156 goto out; 2152 goto out;
2157 } 2153 }
@@ -2161,12 +2157,12 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2161 /* Destroy the CM ID by returning a non-zero value. */ 2157 /* Destroy the CM ID by returning a non-zero value. */
2162 id_priv->cm_id.ib = NULL; 2158 id_priv->cm_id.ib = NULL;
2163 cma_exch(id_priv, CMA_DESTROYING); 2159 cma_exch(id_priv, CMA_DESTROYING);
2164 cma_enable_remove(id_priv); 2160 mutex_unlock(&id_priv->handler_mutex);
2165 rdma_destroy_id(&id_priv->id); 2161 rdma_destroy_id(&id_priv->id);
2166 return ret; 2162 return ret;
2167 } 2163 }
2168out: 2164out:
2169 cma_enable_remove(id_priv); 2165 mutex_unlock(&id_priv->handler_mutex);
2170 return ret; 2166 return ret;
2171} 2167}
2172 2168
@@ -2564,8 +2560,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2564 int ret; 2560 int ret;
2565 2561
2566 id_priv = mc->id_priv; 2562 id_priv = mc->id_priv;
2567 if (cma_disable_remove(id_priv, CMA_ADDR_BOUND) && 2563 if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) &&
2568 cma_disable_remove(id_priv, CMA_ADDR_RESOLVED)) 2564 cma_disable_callback(id_priv, CMA_ADDR_RESOLVED))
2569 return 0; 2565 return 0;
2570 2566
2571 mutex_lock(&id_priv->qp_mutex); 2567 mutex_lock(&id_priv->qp_mutex);
@@ -2590,12 +2586,12 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2590 ret = id_priv->id.event_handler(&id_priv->id, &event); 2586 ret = id_priv->id.event_handler(&id_priv->id, &event);
2591 if (ret) { 2587 if (ret) {
2592 cma_exch(id_priv, CMA_DESTROYING); 2588 cma_exch(id_priv, CMA_DESTROYING);
2593 cma_enable_remove(id_priv); 2589 mutex_unlock(&id_priv->handler_mutex);
2594 rdma_destroy_id(&id_priv->id); 2590 rdma_destroy_id(&id_priv->id);
2595 return 0; 2591 return 0;
2596 } 2592 }
2597 2593
2598 cma_enable_remove(id_priv); 2594 mutex_unlock(&id_priv->handler_mutex);
2599 return 0; 2595 return 0;
2600} 2596}
2601 2597
@@ -2754,6 +2750,7 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv)
2754{ 2750{
2755 struct rdma_cm_event event; 2751 struct rdma_cm_event event;
2756 enum cma_state state; 2752 enum cma_state state;
2753 int ret = 0;
2757 2754
2758 /* Record that we want to remove the device */ 2755 /* Record that we want to remove the device */
2759 state = cma_exch(id_priv, CMA_DEVICE_REMOVAL); 2756 state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
@@ -2761,15 +2758,18 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv)
2761 return 0; 2758 return 0;
2762 2759
2763 cma_cancel_operation(id_priv, state); 2760 cma_cancel_operation(id_priv, state);
2764 wait_event(id_priv->wait_remove, !atomic_read(&id_priv->dev_remove)); 2761 mutex_lock(&id_priv->handler_mutex);
2765 2762
2766 /* Check for destruction from another callback. */ 2763 /* Check for destruction from another callback. */
2767 if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL)) 2764 if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
2768 return 0; 2765 goto out;
2769 2766
2770 memset(&event, 0, sizeof event); 2767 memset(&event, 0, sizeof event);
2771 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; 2768 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
2772 return id_priv->id.event_handler(&id_priv->id, &event); 2769 ret = id_priv->id.event_handler(&id_priv->id, &event);
2770out:
2771 mutex_unlock(&id_priv->handler_mutex);
2772 return ret;
2773} 2773}
2774 2774
2775static void cma_process_remove(struct cma_device *cma_dev) 2775static void cma_process_remove(struct cma_device *cma_dev)
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 7ad47a4b166b..05ac36e6acdb 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id: core_priv.h 1349 2004-12-16 21:09:43Z roland $
33 */ 31 */
34 32
35#ifndef _CORE_PRIV_H 33#ifndef _CORE_PRIV_H
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 5ac5ffee05cb..7913b804311e 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -29,8 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: device.c 1349 2004-12-16 21:09:43Z roland $
34 */ 32 */
35 33
36#include <linux/module.h> 34#include <linux/module.h>
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 1286dc1b98b2..4507043d24c8 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -29,8 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: fmr_pool.c 2730 2005-06-28 16:43:03Z sean.hefty $
34 */ 32 */
35 33
36#include <linux/errno.h> 34#include <linux/errno.h>
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 8b75010016ec..05ce331733b0 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: mad_priv.h 5596 2006-03-03 01:00:07Z sean.hefty $
35 */ 33 */
36 34
37#ifndef __IB_MAD_PRIV_H__ 35#ifndef __IB_MAD_PRIV_H__
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index a5e2a310f312..d0ef7d61c037 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -29,8 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $
34 */ 32 */
35 33
36#include "mad_priv.h" 34#include "mad_priv.h"
diff --git a/drivers/infiniband/core/mad_rmpp.h b/drivers/infiniband/core/mad_rmpp.h
index f0616fd22494..3d336bff1148 100644
--- a/drivers/infiniband/core/mad_rmpp.h
+++ b/drivers/infiniband/core/mad_rmpp.h
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id: mad_rmpp.h 1921 2005-02-25 22:58:44Z sean.hefty $
33 */ 31 */
34 32
35#ifndef __MAD_RMPP_H__ 33#ifndef __MAD_RMPP_H__
diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c
index c972d7235764..019bd4b0863e 100644
--- a/drivers/infiniband/core/packer.c
+++ b/drivers/infiniband/core/packer.c
@@ -29,8 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: packer.c 1349 2004-12-16 21:09:43Z roland $
34 */ 32 */
35 33
36#include <linux/string.h> 34#include <linux/string.h>
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index cf474ec27070..1341de793e51 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: sa_query.c 2811 2005-07-06 18:11:43Z halr $
35 */ 33 */
36 34
37#include <linux/module.h> 35#include <linux/module.h>
@@ -361,7 +359,7 @@ static void update_sm_ah(struct work_struct *work)
361{ 359{
362 struct ib_sa_port *port = 360 struct ib_sa_port *port =
363 container_of(work, struct ib_sa_port, update_task); 361 container_of(work, struct ib_sa_port, update_task);
364 struct ib_sa_sm_ah *new_ah, *old_ah; 362 struct ib_sa_sm_ah *new_ah;
365 struct ib_port_attr port_attr; 363 struct ib_port_attr port_attr;
366 struct ib_ah_attr ah_attr; 364 struct ib_ah_attr ah_attr;
367 365
@@ -397,12 +395,9 @@ static void update_sm_ah(struct work_struct *work)
397 } 395 }
398 396
399 spin_lock_irq(&port->ah_lock); 397 spin_lock_irq(&port->ah_lock);
400 old_ah = port->sm_ah;
401 port->sm_ah = new_ah; 398 port->sm_ah = new_ah;
402 spin_unlock_irq(&port->ah_lock); 399 spin_unlock_irq(&port->ah_lock);
403 400
404 if (old_ah)
405 kref_put(&old_ah->ref, free_sm_ah);
406} 401}
407 402
408static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event) 403static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
@@ -413,8 +408,17 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
413 event->event == IB_EVENT_PKEY_CHANGE || 408 event->event == IB_EVENT_PKEY_CHANGE ||
414 event->event == IB_EVENT_SM_CHANGE || 409 event->event == IB_EVENT_SM_CHANGE ||
415 event->event == IB_EVENT_CLIENT_REREGISTER) { 410 event->event == IB_EVENT_CLIENT_REREGISTER) {
416 struct ib_sa_device *sa_dev; 411 unsigned long flags;
417 sa_dev = container_of(handler, typeof(*sa_dev), event_handler); 412 struct ib_sa_device *sa_dev =
413 container_of(handler, typeof(*sa_dev), event_handler);
414 struct ib_sa_port *port =
415 &sa_dev->port[event->element.port_num - sa_dev->start_port];
416
417 spin_lock_irqsave(&port->ah_lock, flags);
418 if (port->sm_ah)
419 kref_put(&port->sm_ah->ref, free_sm_ah);
420 port->sm_ah = NULL;
421 spin_unlock_irqrestore(&port->ah_lock, flags);
418 422
419 schedule_work(&sa_dev->port[event->element.port_num - 423 schedule_work(&sa_dev->port[event->element.port_num -
420 sa_dev->start_port].update_task); 424 sa_dev->start_port].update_task);
@@ -519,6 +523,10 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
519 unsigned long flags; 523 unsigned long flags;
520 524
521 spin_lock_irqsave(&query->port->ah_lock, flags); 525 spin_lock_irqsave(&query->port->ah_lock, flags);
526 if (!query->port->sm_ah) {
527 spin_unlock_irqrestore(&query->port->ah_lock, flags);
528 return -EAGAIN;
529 }
522 kref_get(&query->port->sm_ah->ref); 530 kref_get(&query->port->sm_ah->ref);
523 query->sm_ah = query->port->sm_ah; 531 query->sm_ah = query->port->sm_ah;
524 spin_unlock_irqrestore(&query->port->ah_lock, flags); 532 spin_unlock_irqrestore(&query->port->ah_lock, flags);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 95756551cf7c..4d1042115598 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: sysfs.c 1349 2004-12-16 21:09:43Z roland $
35 */ 33 */
36 34
37#include "core_priv.h" 35#include "core_priv.h"
@@ -665,6 +663,120 @@ static struct class ib_class = {
665 .dev_uevent = ib_device_uevent, 663 .dev_uevent = ib_device_uevent,
666}; 664};
667 665
666/* Show a given an attribute in the statistics group */
667static ssize_t show_protocol_stat(const struct device *device,
668 struct device_attribute *attr, char *buf,
669 unsigned offset)
670{
671 struct ib_device *dev = container_of(device, struct ib_device, dev);
672 union rdma_protocol_stats stats;
673 ssize_t ret;
674
675 ret = dev->get_protocol_stats(dev, &stats);
676 if (ret)
677 return ret;
678
679 return sprintf(buf, "%llu\n",
680 (unsigned long long) ((u64 *) &stats)[offset]);
681}
682
683/* generate a read-only iwarp statistics attribute */
684#define IW_STATS_ENTRY(name) \
685static ssize_t show_##name(struct device *device, \
686 struct device_attribute *attr, char *buf) \
687{ \
688 return show_protocol_stat(device, attr, buf, \
689 offsetof(struct iw_protocol_stats, name) / \
690 sizeof (u64)); \
691} \
692static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
693
694IW_STATS_ENTRY(ipInReceives);
695IW_STATS_ENTRY(ipInHdrErrors);
696IW_STATS_ENTRY(ipInTooBigErrors);
697IW_STATS_ENTRY(ipInNoRoutes);
698IW_STATS_ENTRY(ipInAddrErrors);
699IW_STATS_ENTRY(ipInUnknownProtos);
700IW_STATS_ENTRY(ipInTruncatedPkts);
701IW_STATS_ENTRY(ipInDiscards);
702IW_STATS_ENTRY(ipInDelivers);
703IW_STATS_ENTRY(ipOutForwDatagrams);
704IW_STATS_ENTRY(ipOutRequests);
705IW_STATS_ENTRY(ipOutDiscards);
706IW_STATS_ENTRY(ipOutNoRoutes);
707IW_STATS_ENTRY(ipReasmTimeout);
708IW_STATS_ENTRY(ipReasmReqds);
709IW_STATS_ENTRY(ipReasmOKs);
710IW_STATS_ENTRY(ipReasmFails);
711IW_STATS_ENTRY(ipFragOKs);
712IW_STATS_ENTRY(ipFragFails);
713IW_STATS_ENTRY(ipFragCreates);
714IW_STATS_ENTRY(ipInMcastPkts);
715IW_STATS_ENTRY(ipOutMcastPkts);
716IW_STATS_ENTRY(ipInBcastPkts);
717IW_STATS_ENTRY(ipOutBcastPkts);
718IW_STATS_ENTRY(tcpRtoAlgorithm);
719IW_STATS_ENTRY(tcpRtoMin);
720IW_STATS_ENTRY(tcpRtoMax);
721IW_STATS_ENTRY(tcpMaxConn);
722IW_STATS_ENTRY(tcpActiveOpens);
723IW_STATS_ENTRY(tcpPassiveOpens);
724IW_STATS_ENTRY(tcpAttemptFails);
725IW_STATS_ENTRY(tcpEstabResets);
726IW_STATS_ENTRY(tcpCurrEstab);
727IW_STATS_ENTRY(tcpInSegs);
728IW_STATS_ENTRY(tcpOutSegs);
729IW_STATS_ENTRY(tcpRetransSegs);
730IW_STATS_ENTRY(tcpInErrs);
731IW_STATS_ENTRY(tcpOutRsts);
732
733static struct attribute *iw_proto_stats_attrs[] = {
734 &dev_attr_ipInReceives.attr,
735 &dev_attr_ipInHdrErrors.attr,
736 &dev_attr_ipInTooBigErrors.attr,
737 &dev_attr_ipInNoRoutes.attr,
738 &dev_attr_ipInAddrErrors.attr,
739 &dev_attr_ipInUnknownProtos.attr,
740 &dev_attr_ipInTruncatedPkts.attr,
741 &dev_attr_ipInDiscards.attr,
742 &dev_attr_ipInDelivers.attr,
743 &dev_attr_ipOutForwDatagrams.attr,
744 &dev_attr_ipOutRequests.attr,
745 &dev_attr_ipOutDiscards.attr,
746 &dev_attr_ipOutNoRoutes.attr,
747 &dev_attr_ipReasmTimeout.attr,
748 &dev_attr_ipReasmReqds.attr,
749 &dev_attr_ipReasmOKs.attr,
750 &dev_attr_ipReasmFails.attr,
751 &dev_attr_ipFragOKs.attr,
752 &dev_attr_ipFragFails.attr,
753 &dev_attr_ipFragCreates.attr,
754 &dev_attr_ipInMcastPkts.attr,
755 &dev_attr_ipOutMcastPkts.attr,
756 &dev_attr_ipInBcastPkts.attr,
757 &dev_attr_ipOutBcastPkts.attr,
758 &dev_attr_tcpRtoAlgorithm.attr,
759 &dev_attr_tcpRtoMin.attr,
760 &dev_attr_tcpRtoMax.attr,
761 &dev_attr_tcpMaxConn.attr,
762 &dev_attr_tcpActiveOpens.attr,
763 &dev_attr_tcpPassiveOpens.attr,
764 &dev_attr_tcpAttemptFails.attr,
765 &dev_attr_tcpEstabResets.attr,
766 &dev_attr_tcpCurrEstab.attr,
767 &dev_attr_tcpInSegs.attr,
768 &dev_attr_tcpOutSegs.attr,
769 &dev_attr_tcpRetransSegs.attr,
770 &dev_attr_tcpInErrs.attr,
771 &dev_attr_tcpOutRsts.attr,
772 NULL
773};
774
775static struct attribute_group iw_stats_group = {
776 .name = "proto_stats",
777 .attrs = iw_proto_stats_attrs,
778};
779
668int ib_device_register_sysfs(struct ib_device *device) 780int ib_device_register_sysfs(struct ib_device *device)
669{ 781{
670 struct device *class_dev = &device->dev; 782 struct device *class_dev = &device->dev;
@@ -707,6 +819,12 @@ int ib_device_register_sysfs(struct ib_device *device)
707 } 819 }
708 } 820 }
709 821
822 if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) {
823 ret = sysfs_create_group(&class_dev->kobj, &iw_stats_group);
824 if (ret)
825 goto err_put;
826 }
827
710 return 0; 828 return 0;
711 829
712err_put: 830err_put:
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index b25675faaaf5..9494005d1c9a 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -29,8 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: ucm.c 4311 2005-12-05 18:42:01Z sean.hefty $
34 */ 32 */
35 33
36#include <linux/completion.h> 34#include <linux/completion.h>
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 997c07db6d8f..8ec7876bedcf 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -29,8 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: ud_header.c 1349 2004-12-16 21:09:43Z roland $
34 */ 32 */
35 33
36#include <linux/errno.h> 34#include <linux/errno.h>
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index a1768dbb0720..6f7c096abf13 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $
35 */ 33 */
36 34
37#include <linux/mm.h> 35#include <linux/mm.h>
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 208c7f34323c..268a2d23b7c9 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -31,8 +31,6 @@
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE. 33 * SOFTWARE.
34 *
35 * $Id: user_mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
36 */ 34 */
37 35
38#include <linux/module.h> 36#include <linux/module.h>
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 376a57ce1b40..b3ea9587dc80 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -32,8 +32,6 @@
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE. 34 * SOFTWARE.
35 *
36 * $Id: uverbs.h 2559 2005-06-06 19:43:16Z roland $
37 */ 35 */
38 36
39#ifndef UVERBS_H 37#ifndef UVERBS_H
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 2c3bff5fe867..56feab6c251e 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -31,8 +31,6 @@
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE. 33 * SOFTWARE.
34 *
35 * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $
36 */ 34 */
37 35
38#include <linux/file.h> 36#include <linux/file.h>
@@ -919,7 +917,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
919 resp->wc[i].opcode = wc[i].opcode; 917 resp->wc[i].opcode = wc[i].opcode;
920 resp->wc[i].vendor_err = wc[i].vendor_err; 918 resp->wc[i].vendor_err = wc[i].vendor_err;
921 resp->wc[i].byte_len = wc[i].byte_len; 919 resp->wc[i].byte_len = wc[i].byte_len;
922 resp->wc[i].imm_data = (__u32 __force) wc[i].imm_data; 920 resp->wc[i].ex.imm_data = (__u32 __force) wc[i].ex.imm_data;
923 resp->wc[i].qp_num = wc[i].qp->qp_num; 921 resp->wc[i].qp_num = wc[i].qp->qp_num;
924 resp->wc[i].src_qp = wc[i].src_qp; 922 resp->wc[i].src_qp = wc[i].src_qp;
925 resp->wc[i].wc_flags = wc[i].wc_flags; 923 resp->wc[i].wc_flags = wc[i].wc_flags;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 0f34858e31e7..aeee856c4060 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -32,8 +32,6 @@
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE. 34 * SOFTWARE.
35 *
36 * $Id: uverbs_main.c 2733 2005-06-28 19:14:34Z roland $
37 */ 35 */
38 36
39#include <linux/module.h> 37#include <linux/module.h>
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 05042089de6e..a7da9be43e61 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -34,8 +34,6 @@
34 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
35 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 * SOFTWARE. 36 * SOFTWARE.
37 *
38 * $Id: verbs.c 1349 2004-12-16 21:09:43Z roland $
39 */ 37 */
40 38
41#include <linux/errno.h> 39#include <linux/errno.h>
@@ -317,7 +315,6 @@ static const struct {
317} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { 315} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
318 [IB_QPS_RESET] = { 316 [IB_QPS_RESET] = {
319 [IB_QPS_RESET] = { .valid = 1 }, 317 [IB_QPS_RESET] = { .valid = 1 },
320 [IB_QPS_ERR] = { .valid = 1 },
321 [IB_QPS_INIT] = { 318 [IB_QPS_INIT] = {
322 .valid = 1, 319 .valid = 1,
323 .req_param = { 320 .req_param = {
@@ -755,6 +752,52 @@ int ib_dereg_mr(struct ib_mr *mr)
755} 752}
756EXPORT_SYMBOL(ib_dereg_mr); 753EXPORT_SYMBOL(ib_dereg_mr);
757 754
755struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
756{
757 struct ib_mr *mr;
758
759 if (!pd->device->alloc_fast_reg_mr)
760 return ERR_PTR(-ENOSYS);
761
762 mr = pd->device->alloc_fast_reg_mr(pd, max_page_list_len);
763
764 if (!IS_ERR(mr)) {
765 mr->device = pd->device;
766 mr->pd = pd;
767 mr->uobject = NULL;
768 atomic_inc(&pd->usecnt);
769 atomic_set(&mr->usecnt, 0);
770 }
771
772 return mr;
773}
774EXPORT_SYMBOL(ib_alloc_fast_reg_mr);
775
776struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device,
777 int max_page_list_len)
778{
779 struct ib_fast_reg_page_list *page_list;
780
781 if (!device->alloc_fast_reg_page_list)
782 return ERR_PTR(-ENOSYS);
783
784 page_list = device->alloc_fast_reg_page_list(device, max_page_list_len);
785
786 if (!IS_ERR(page_list)) {
787 page_list->device = device;
788 page_list->max_page_list_len = max_page_list_len;
789 }
790
791 return page_list;
792}
793EXPORT_SYMBOL(ib_alloc_fast_reg_page_list);
794
795void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
796{
797 page_list->device->free_fast_reg_page_list(page_list);
798}
799EXPORT_SYMBOL(ib_free_fast_reg_page_list);
800
758/* Memory windows */ 801/* Memory windows */
759 802
760struct ib_mw *ib_alloc_mw(struct ib_pd *pd) 803struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
index b1441aeb60c2..dd05c4835642 100644
--- a/drivers/infiniband/hw/amso1100/c2_rnic.c
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -454,7 +454,7 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev)
454 (IB_DEVICE_RESIZE_MAX_WR | 454 (IB_DEVICE_RESIZE_MAX_WR |
455 IB_DEVICE_CURR_QP_STATE_MOD | 455 IB_DEVICE_CURR_QP_STATE_MOD |
456 IB_DEVICE_SYS_IMAGE_GUID | 456 IB_DEVICE_SYS_IMAGE_GUID |
457 IB_DEVICE_ZERO_STAG | 457 IB_DEVICE_LOCAL_DMA_LKEY |
458 IB_DEVICE_MEM_WINDOW); 458 IB_DEVICE_MEM_WINDOW);
459 459
460 /* Allocate the qptr_array */ 460 /* Allocate the qptr_array */
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 3f441fc57c17..f6d5747153a5 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -145,7 +145,9 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
145 } 145 }
146 wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); 146 wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
147 memset(wqe, 0, sizeof(*wqe)); 147 memset(wqe, 0, sizeof(*wqe));
148 build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7); 148 build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD,
149 T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7,
150 T3_SOPEOP);
149 wqe->flags = cpu_to_be32(MODQP_WRITE_EC); 151 wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
150 sge_cmd = qpid << 8 | 3; 152 sge_cmd = qpid << 8 | 3;
151 wqe->sge_cmd = cpu_to_be64(sge_cmd); 153 wqe->sge_cmd = cpu_to_be64(sge_cmd);
@@ -276,7 +278,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
276 if (!wq->qpid) 278 if (!wq->qpid)
277 return -ENOMEM; 279 return -ENOMEM;
278 280
279 wq->rq = kzalloc(depth * sizeof(u64), GFP_KERNEL); 281 wq->rq = kzalloc(depth * sizeof(struct t3_swrq), GFP_KERNEL);
280 if (!wq->rq) 282 if (!wq->rq)
281 goto err1; 283 goto err1;
282 284
@@ -300,6 +302,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
300 if (!kernel_domain) 302 if (!kernel_domain)
301 wq->udb = (u64)rdev_p->rnic_info.udbell_physbase + 303 wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
302 (wq->qpid << rdev_p->qpshift); 304 (wq->qpid << rdev_p->qpshift);
305 wq->rdev = rdev_p;
303 PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__, 306 PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__,
304 wq->qpid, wq->doorbell, (unsigned long long) wq->udb); 307 wq->qpid, wq->doorbell, (unsigned long long) wq->udb);
305 return 0; 308 return 0;
@@ -558,7 +561,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
558 wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); 561 wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
559 memset(wqe, 0, sizeof(*wqe)); 562 memset(wqe, 0, sizeof(*wqe));
560 build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0, 563 build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
561 T3_CTL_QP_TID, 7); 564 T3_CTL_QP_TID, 7, T3_SOPEOP);
562 wqe->flags = cpu_to_be32(MODQP_WRITE_EC); 565 wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
563 sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3; 566 sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
564 wqe->sge_cmd = cpu_to_be64(sge_cmd); 567 wqe->sge_cmd = cpu_to_be64(sge_cmd);
@@ -674,7 +677,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
674 build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag, 677 build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag,
675 Q_GENBIT(rdev_p->ctrl_qp.wptr, 678 Q_GENBIT(rdev_p->ctrl_qp.wptr,
676 T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID, 679 T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID,
677 wr_len); 680 wr_len, T3_SOPEOP);
678 if (flag == T3_COMPLETION_FLAG) 681 if (flag == T3_COMPLETION_FLAG)
679 ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID); 682 ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID);
680 len -= 96; 683 len -= 96;
@@ -816,6 +819,13 @@ int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
816 0, 0); 819 0, 0);
817} 820}
818 821
822int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr)
823{
824 *stag = T3_STAG_UNSET;
825 return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR,
826 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr);
827}
828
819int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) 829int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
820{ 830{
821 struct t3_rdma_init_wr *wqe; 831 struct t3_rdma_init_wr *wqe;
@@ -1257,13 +1267,16 @@ proc_cqe:
1257 wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe); 1267 wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
1258 PDBG("%s completing sq idx %ld\n", __func__, 1268 PDBG("%s completing sq idx %ld\n", __func__,
1259 Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)); 1269 Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
1260 *cookie = (wq->sq + 1270 *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id;
1261 Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id;
1262 wq->sq_rptr++; 1271 wq->sq_rptr++;
1263 } else { 1272 } else {
1264 PDBG("%s completing rq idx %ld\n", __func__, 1273 PDBG("%s completing rq idx %ld\n", __func__,
1265 Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); 1274 Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
1266 *cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); 1275 *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id;
1276 if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr)
1277 cxio_hal_pblpool_free(wq->rdev,
1278 wq->rq[Q_PTR2IDX(wq->rq_rptr,
1279 wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE);
1267 wq->rq_rptr++; 1280 wq->rq_rptr++;
1268 } 1281 }
1269 1282
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 6e128f6bab05..656fe47bc84f 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -45,15 +45,17 @@
45#define T3_CTRL_QP_SIZE_LOG2 8 45#define T3_CTRL_QP_SIZE_LOG2 8
46#define T3_CTRL_CQ_ID 0 46#define T3_CTRL_CQ_ID 0
47 47
48/* TBD */
49#define T3_MAX_NUM_RI (1<<15) 48#define T3_MAX_NUM_RI (1<<15)
50#define T3_MAX_NUM_QP (1<<15) 49#define T3_MAX_NUM_QP (1<<15)
51#define T3_MAX_NUM_CQ (1<<15) 50#define T3_MAX_NUM_CQ (1<<15)
52#define T3_MAX_NUM_PD (1<<15) 51#define T3_MAX_NUM_PD (1<<15)
53#define T3_MAX_PBL_SIZE 256 52#define T3_MAX_PBL_SIZE 256
54#define T3_MAX_RQ_SIZE 1024 53#define T3_MAX_RQ_SIZE 1024
54#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1)
55#define T3_MAX_CQ_DEPTH 8192
55#define T3_MAX_NUM_STAG (1<<15) 56#define T3_MAX_NUM_STAG (1<<15)
56#define T3_MAX_MR_SIZE 0x100000000ULL 57#define T3_MAX_MR_SIZE 0x100000000ULL
58#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */
57 59
58#define T3_STAG_UNSET 0xffffffff 60#define T3_STAG_UNSET 0xffffffff
59 61
@@ -165,6 +167,7 @@ int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
165int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size, 167int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
166 u32 pbl_addr); 168 u32 pbl_addr);
167int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid); 169int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
170int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr);
168int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag); 171int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
169int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr); 172int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
170void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb); 173void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index f1a25a821a45..04618f7bfbb3 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -39,6 +39,9 @@
39 39
40#define T3_MAX_SGE 4 40#define T3_MAX_SGE 4
41#define T3_MAX_INLINE 64 41#define T3_MAX_INLINE 64
42#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3)
43#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024)
44#define T3_STAG0_PAGE_SHIFT 15
42 45
43#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr)) 46#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr))
44#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \ 47#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \
@@ -72,7 +75,8 @@ enum t3_wr_opcode {
72 T3_WR_BIND = FW_WROPCODE_RI_BIND_MW, 75 T3_WR_BIND = FW_WROPCODE_RI_BIND_MW,
73 T3_WR_RCV = FW_WROPCODE_RI_RECEIVE, 76 T3_WR_RCV = FW_WROPCODE_RI_RECEIVE,
74 T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT, 77 T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT,
75 T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP 78 T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP,
79 T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR
76} __attribute__ ((packed)); 80} __attribute__ ((packed));
77 81
78enum t3_rdma_opcode { 82enum t3_rdma_opcode {
@@ -89,7 +93,8 @@ enum t3_rdma_opcode {
89 T3_FAST_REGISTER, 93 T3_FAST_REGISTER,
90 T3_LOCAL_INV, 94 T3_LOCAL_INV,
91 T3_QP_MOD, 95 T3_QP_MOD,
92 T3_BYPASS 96 T3_BYPASS,
97 T3_RDMA_READ_REQ_WITH_INV,
93} __attribute__ ((packed)); 98} __attribute__ ((packed));
94 99
95static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop) 100static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop)
@@ -103,6 +108,7 @@ static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop)
103 case T3_WR_BIND: return T3_BIND_MW; 108 case T3_WR_BIND: return T3_BIND_MW;
104 case T3_WR_INIT: return T3_RDMA_INIT; 109 case T3_WR_INIT: return T3_RDMA_INIT;
105 case T3_WR_QP_MOD: return T3_QP_MOD; 110 case T3_WR_QP_MOD: return T3_QP_MOD;
111 case T3_WR_FASTREG: return T3_FAST_REGISTER;
106 default: break; 112 default: break;
107 } 113 }
108 return -1; 114 return -1;
@@ -170,11 +176,54 @@ struct t3_send_wr {
170 struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */ 176 struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */
171}; 177};
172 178
179#define T3_MAX_FASTREG_DEPTH 24
180#define T3_MAX_FASTREG_FRAG 10
181
182struct t3_fastreg_wr {
183 struct fw_riwrh wrh; /* 0 */
184 union t3_wrid wrid; /* 1 */
185 __be32 stag; /* 2 */
186 __be32 len;
187 __be32 va_base_hi; /* 3 */
188 __be32 va_base_lo_fbo;
189 __be32 page_type_perms; /* 4 */
190 __be32 reserved1;
191 __be64 pbl_addrs[0]; /* 5+ */
192};
193
194/*
195 * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this.
196 */
197struct t3_pbl_frag {
198 struct fw_riwrh wrh; /* 0 */
199 __be64 pbl_addrs[14]; /* 1..14 */
200};
201
202#define S_FR_PAGE_COUNT 24
203#define M_FR_PAGE_COUNT 0xff
204#define V_FR_PAGE_COUNT(x) ((x) << S_FR_PAGE_COUNT)
205#define G_FR_PAGE_COUNT(x) ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT)
206
207#define S_FR_PAGE_SIZE 16
208#define M_FR_PAGE_SIZE 0x1f
209#define V_FR_PAGE_SIZE(x) ((x) << S_FR_PAGE_SIZE)
210#define G_FR_PAGE_SIZE(x) ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE)
211
212#define S_FR_TYPE 8
213#define M_FR_TYPE 0x1
214#define V_FR_TYPE(x) ((x) << S_FR_TYPE)
215#define G_FR_TYPE(x) ((((x) >> S_FR_TYPE)) & M_FR_TYPE)
216
217#define S_FR_PERMS 0
218#define M_FR_PERMS 0xff
219#define V_FR_PERMS(x) ((x) << S_FR_PERMS)
220#define G_FR_PERMS(x) ((((x) >> S_FR_PERMS)) & M_FR_PERMS)
221
173struct t3_local_inv_wr { 222struct t3_local_inv_wr {
174 struct fw_riwrh wrh; /* 0 */ 223 struct fw_riwrh wrh; /* 0 */
175 union t3_wrid wrid; /* 1 */ 224 union t3_wrid wrid; /* 1 */
176 __be32 stag; /* 2 */ 225 __be32 stag; /* 2 */
177 __be32 reserved3; 226 __be32 reserved;
178}; 227};
179 228
180struct t3_rdma_write_wr { 229struct t3_rdma_write_wr {
@@ -193,7 +242,8 @@ struct t3_rdma_read_wr {
193 struct fw_riwrh wrh; /* 0 */ 242 struct fw_riwrh wrh; /* 0 */
194 union t3_wrid wrid; /* 1 */ 243 union t3_wrid wrid; /* 1 */
195 u8 rdmaop; /* 2 */ 244 u8 rdmaop; /* 2 */
196 u8 reserved[3]; 245 u8 local_inv;
246 u8 reserved[2];
197 __be32 rem_stag; 247 __be32 rem_stag;
198 __be64 rem_to; /* 3 */ 248 __be64 rem_to; /* 3 */
199 __be32 local_stag; /* 4 */ 249 __be32 local_stag; /* 4 */
@@ -201,18 +251,6 @@ struct t3_rdma_read_wr {
201 __be64 local_to; /* 5 */ 251 __be64 local_to; /* 5 */
202}; 252};
203 253
204enum t3_addr_type {
205 T3_VA_BASED_TO = 0x0,
206 T3_ZERO_BASED_TO = 0x1
207} __attribute__ ((packed));
208
209enum t3_mem_perms {
210 T3_MEM_ACCESS_LOCAL_READ = 0x1,
211 T3_MEM_ACCESS_LOCAL_WRITE = 0x2,
212 T3_MEM_ACCESS_REM_READ = 0x4,
213 T3_MEM_ACCESS_REM_WRITE = 0x8
214} __attribute__ ((packed));
215
216struct t3_bind_mw_wr { 254struct t3_bind_mw_wr {
217 struct fw_riwrh wrh; /* 0 */ 255 struct fw_riwrh wrh; /* 0 */
218 union t3_wrid wrid; /* 1 */ 256 union t3_wrid wrid; /* 1 */
@@ -336,6 +374,11 @@ struct t3_genbit {
336 __be64 genbit; 374 __be64 genbit;
337}; 375};
338 376
377struct t3_wq_in_err {
378 u64 flit[13];
379 u64 err;
380};
381
339enum rdma_init_wr_flags { 382enum rdma_init_wr_flags {
340 MPA_INITIATOR = (1<<0), 383 MPA_INITIATOR = (1<<0),
341 PRIV_QP = (1<<1), 384 PRIV_QP = (1<<1),
@@ -346,13 +389,16 @@ union t3_wr {
346 struct t3_rdma_write_wr write; 389 struct t3_rdma_write_wr write;
347 struct t3_rdma_read_wr read; 390 struct t3_rdma_read_wr read;
348 struct t3_receive_wr recv; 391 struct t3_receive_wr recv;
392 struct t3_fastreg_wr fastreg;
393 struct t3_pbl_frag pbl_frag;
349 struct t3_local_inv_wr local_inv; 394 struct t3_local_inv_wr local_inv;
350 struct t3_bind_mw_wr bind; 395 struct t3_bind_mw_wr bind;
351 struct t3_bypass_wr bypass; 396 struct t3_bypass_wr bypass;
352 struct t3_rdma_init_wr init; 397 struct t3_rdma_init_wr init;
353 struct t3_modify_qp_wr qp_mod; 398 struct t3_modify_qp_wr qp_mod;
354 struct t3_genbit genbit; 399 struct t3_genbit genbit;
355 u64 flit[16]; 400 struct t3_wq_in_err wq_in_err;
401 __be64 flit[16];
356}; 402};
357 403
358#define T3_SQ_CQE_FLIT 13 404#define T3_SQ_CQE_FLIT 13
@@ -366,12 +412,18 @@ static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe)
366 return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags)); 412 return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags));
367} 413}
368 414
415enum t3_wr_hdr_bits {
416 T3_EOP = 1,
417 T3_SOP = 2,
418 T3_SOPEOP = T3_EOP|T3_SOP,
419};
420
369static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op, 421static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op,
370 enum t3_wr_flags flags, u8 genbit, u32 tid, 422 enum t3_wr_flags flags, u8 genbit, u32 tid,
371 u8 len) 423 u8 len, u8 sopeop)
372{ 424{
373 wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) | 425 wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) |
374 V_FW_RIWR_SOPEOP(M_FW_RIWR_SOPEOP) | 426 V_FW_RIWR_SOPEOP(sopeop) |
375 V_FW_RIWR_FLAGS(flags)); 427 V_FW_RIWR_FLAGS(flags));
376 wmb(); 428 wmb();
377 wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) | 429 wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) |
@@ -404,6 +456,7 @@ enum tpt_addr_type {
404}; 456};
405 457
406enum tpt_mem_perm { 458enum tpt_mem_perm {
459 TPT_MW_BIND = 0x10,
407 TPT_LOCAL_READ = 0x8, 460 TPT_LOCAL_READ = 0x8,
408 TPT_LOCAL_WRITE = 0x4, 461 TPT_LOCAL_WRITE = 0x4,
409 TPT_REMOTE_READ = 0x2, 462 TPT_REMOTE_READ = 0x2,
@@ -615,6 +668,11 @@ struct t3_swsq {
615 int signaled; 668 int signaled;
616}; 669};
617 670
671struct t3_swrq {
672 __u64 wr_id;
673 __u32 pbl_addr;
674};
675
618/* 676/*
619 * A T3 WQ implements both the SQ and RQ. 677 * A T3 WQ implements both the SQ and RQ.
620 */ 678 */
@@ -631,14 +689,15 @@ struct t3_wq {
631 u32 sq_wptr; /* sq_wptr - sq_rptr == count of */ 689 u32 sq_wptr; /* sq_wptr - sq_rptr == count of */
632 u32 sq_rptr; /* pending wrs */ 690 u32 sq_rptr; /* pending wrs */
633 u32 sq_size_log2; /* sq size */ 691 u32 sq_size_log2; /* sq size */
634 u64 *rq; /* SW RQ (holds consumer wr_ids */ 692 struct t3_swrq *rq; /* SW RQ (holds consumer wr_ids */
635 u32 rq_wptr; /* rq_wptr - rq_rptr == count of */ 693 u32 rq_wptr; /* rq_wptr - rq_rptr == count of */
636 u32 rq_rptr; /* pending wrs */ 694 u32 rq_rptr; /* pending wrs */
637 u64 *rq_oldest_wr; /* oldest wr on the SW RQ */ 695 struct t3_swrq *rq_oldest_wr; /* oldest wr on the SW RQ */
638 u32 rq_size_log2; /* rq size */ 696 u32 rq_size_log2; /* rq size */
639 u32 rq_addr; /* rq adapter address */ 697 u32 rq_addr; /* rq adapter address */
640 void __iomem *doorbell; /* kernel db */ 698 void __iomem *doorbell; /* kernel db */
641 u64 udb; /* user db if any */ 699 u64 udb; /* user db if any */
700 struct cxio_rdev *rdev;
642}; 701};
643 702
644struct t3_cq { 703struct t3_cq {
@@ -659,7 +718,7 @@ struct t3_cq {
659 718
660static inline void cxio_set_wq_in_error(struct t3_wq *wq) 719static inline void cxio_set_wq_in_error(struct t3_wq *wq)
661{ 720{
662 wq->queue->flit[13] = 1; 721 wq->queue->wq_in_err.err = 1;
663} 722}
664 723
665static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) 724static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index 71554eacb13c..4489c89d6710 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -71,18 +71,16 @@ static void rnic_init(struct iwch_dev *rnicp)
71 idr_init(&rnicp->mmidr); 71 idr_init(&rnicp->mmidr);
72 spin_lock_init(&rnicp->lock); 72 spin_lock_init(&rnicp->lock);
73 73
74 rnicp->attr.vendor_id = 0x168;
75 rnicp->attr.vendor_part_id = 7;
76 rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; 74 rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
77 rnicp->attr.max_wrs = (1UL << 24) - 1; 75 rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
78 rnicp->attr.max_sge_per_wr = T3_MAX_SGE; 76 rnicp->attr.max_sge_per_wr = T3_MAX_SGE;
79 rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE; 77 rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE;
80 rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1; 78 rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1;
81 rnicp->attr.max_cqes_per_cq = (1UL << 24) - 1; 79 rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH;
82 rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev); 80 rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev);
83 rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE; 81 rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
84 rnicp->attr.max_pds = T3_MAX_NUM_PD - 1; 82 rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
85 rnicp->attr.mem_pgsizes_bitmask = 0x7FFF; /* 4KB-128MB */ 83 rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK;
86 rnicp->attr.max_mr_size = T3_MAX_MR_SIZE; 84 rnicp->attr.max_mr_size = T3_MAX_MR_SIZE;
87 rnicp->attr.can_resize_wq = 0; 85 rnicp->attr.can_resize_wq = 0;
88 rnicp->attr.max_rdma_reads_per_qp = 8; 86 rnicp->attr.max_rdma_reads_per_qp = 8;
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index d2409a505e8d..3773453b2cf0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -48,8 +48,6 @@ struct iwch_qp;
48struct iwch_mr; 48struct iwch_mr;
49 49
50struct iwch_rnic_attributes { 50struct iwch_rnic_attributes {
51 u32 vendor_id;
52 u32 vendor_part_id;
53 u32 max_qps; 51 u32 max_qps;
54 u32 max_wrs; /* Max for any SQ/RQ */ 52 u32 max_wrs; /* Max for any SQ/RQ */
55 u32 max_sge_per_wr; 53 u32 max_sge_per_wr;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index 4ee8ccd0a9e5..cf5474ae68ff 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -81,6 +81,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
81 wc->wr_id = cookie; 81 wc->wr_id = cookie;
82 wc->qp = &qhp->ibqp; 82 wc->qp = &qhp->ibqp;
83 wc->vendor_err = CQE_STATUS(cqe); 83 wc->vendor_err = CQE_STATUS(cqe);
84 wc->wc_flags = 0;
84 85
85 PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " 86 PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x "
86 "lo 0x%x cookie 0x%llx\n", __func__, 87 "lo 0x%x cookie 0x%llx\n", __func__,
@@ -94,6 +95,11 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
94 else 95 else
95 wc->byte_len = 0; 96 wc->byte_len = 0;
96 wc->opcode = IB_WC_RECV; 97 wc->opcode = IB_WC_RECV;
98 if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV ||
99 CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) {
100 wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe);
101 wc->wc_flags |= IB_WC_WITH_INVALIDATE;
102 }
97 } else { 103 } else {
98 switch (CQE_OPCODE(cqe)) { 104 switch (CQE_OPCODE(cqe)) {
99 case T3_RDMA_WRITE: 105 case T3_RDMA_WRITE:
@@ -105,17 +111,20 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
105 break; 111 break;
106 case T3_SEND: 112 case T3_SEND:
107 case T3_SEND_WITH_SE: 113 case T3_SEND_WITH_SE:
114 case T3_SEND_WITH_INV:
115 case T3_SEND_WITH_SE_INV:
108 wc->opcode = IB_WC_SEND; 116 wc->opcode = IB_WC_SEND;
109 break; 117 break;
110 case T3_BIND_MW: 118 case T3_BIND_MW:
111 wc->opcode = IB_WC_BIND_MW; 119 wc->opcode = IB_WC_BIND_MW;
112 break; 120 break;
113 121
114 /* these aren't supported yet */
115 case T3_SEND_WITH_INV:
116 case T3_SEND_WITH_SE_INV:
117 case T3_LOCAL_INV: 122 case T3_LOCAL_INV:
123 wc->opcode = IB_WC_LOCAL_INV;
124 break;
118 case T3_FAST_REGISTER: 125 case T3_FAST_REGISTER:
126 wc->opcode = IB_WC_FAST_REG_MR;
127 break;
119 default: 128 default:
120 printk(KERN_ERR MOD "Unexpected opcode %d " 129 printk(KERN_ERR MOD "Unexpected opcode %d "
121 "in the CQE received for QPID=0x%0x\n", 130 "in the CQE received for QPID=0x%0x\n",
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 95f82cfb6c54..b89640aa6e10 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -56,6 +56,7 @@
56#include "iwch_provider.h" 56#include "iwch_provider.h"
57#include "iwch_cm.h" 57#include "iwch_cm.h"
58#include "iwch_user.h" 58#include "iwch_user.h"
59#include "common.h"
59 60
60static int iwch_modify_port(struct ib_device *ibdev, 61static int iwch_modify_port(struct ib_device *ibdev,
61 u8 port, int port_modify_mask, 62 u8 port, int port_modify_mask,
@@ -747,6 +748,7 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
747 mhp->attr.type = TPT_MW; 748 mhp->attr.type = TPT_MW;
748 mhp->attr.stag = stag; 749 mhp->attr.stag = stag;
749 mmid = (stag) >> 8; 750 mmid = (stag) >> 8;
751 mhp->ibmw.rkey = stag;
750 insert_handle(rhp, &rhp->mmidr, mhp, mmid); 752 insert_handle(rhp, &rhp->mmidr, mhp, mmid);
751 PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); 753 PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
752 return &(mhp->ibmw); 754 return &(mhp->ibmw);
@@ -768,6 +770,68 @@ static int iwch_dealloc_mw(struct ib_mw *mw)
768 return 0; 770 return 0;
769} 771}
770 772
773static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
774{
775 struct iwch_dev *rhp;
776 struct iwch_pd *php;
777 struct iwch_mr *mhp;
778 u32 mmid;
779 u32 stag = 0;
780 int ret;
781
782 php = to_iwch_pd(pd);
783 rhp = php->rhp;
784 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
785 if (!mhp)
786 return ERR_PTR(-ENOMEM);
787
788 mhp->rhp = rhp;
789 ret = iwch_alloc_pbl(mhp, pbl_depth);
790 if (ret) {
791 kfree(mhp);
792 return ERR_PTR(ret);
793 }
794 mhp->attr.pbl_size = pbl_depth;
795 ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
796 mhp->attr.pbl_size, mhp->attr.pbl_addr);
797 if (ret) {
798 iwch_free_pbl(mhp);
799 kfree(mhp);
800 return ERR_PTR(ret);
801 }
802 mhp->attr.pdid = php->pdid;
803 mhp->attr.type = TPT_NON_SHARED_MR;
804 mhp->attr.stag = stag;
805 mhp->attr.state = 1;
806 mmid = (stag) >> 8;
807 mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
808 insert_handle(rhp, &rhp->mmidr, mhp, mmid);
809 PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
810 return &(mhp->ibmr);
811}
812
813static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl(
814 struct ib_device *device,
815 int page_list_len)
816{
817 struct ib_fast_reg_page_list *page_list;
818
819 page_list = kmalloc(sizeof *page_list + page_list_len * sizeof(u64),
820 GFP_KERNEL);
821 if (!page_list)
822 return ERR_PTR(-ENOMEM);
823
824 page_list->page_list = (u64 *)(page_list + 1);
825 page_list->max_page_list_len = page_list_len;
826
827 return page_list;
828}
829
830static void iwch_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list)
831{
832 kfree(page_list);
833}
834
771static int iwch_destroy_qp(struct ib_qp *ib_qp) 835static int iwch_destroy_qp(struct ib_qp *ib_qp)
772{ 836{
773 struct iwch_dev *rhp; 837 struct iwch_dev *rhp;
@@ -843,6 +907,15 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
843 */ 907 */
844 sqsize = roundup_pow_of_two(attrs->cap.max_send_wr); 908 sqsize = roundup_pow_of_two(attrs->cap.max_send_wr);
845 wqsize = roundup_pow_of_two(rqsize + sqsize); 909 wqsize = roundup_pow_of_two(rqsize + sqsize);
910
911 /*
912 * Kernel users need more wq space for fastreg WRs which can take
913 * 2 WR fragments.
914 */
915 ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL;
916 if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
917 wqsize = roundup_pow_of_two(rqsize +
918 roundup_pow_of_two(attrs->cap.max_send_wr * 2));
846 PDBG("%s wqsize %d sqsize %d rqsize %d\n", __func__, 919 PDBG("%s wqsize %d sqsize %d rqsize %d\n", __func__,
847 wqsize, sqsize, rqsize); 920 wqsize, sqsize, rqsize);
848 qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); 921 qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
@@ -851,7 +924,6 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
851 qhp->wq.size_log2 = ilog2(wqsize); 924 qhp->wq.size_log2 = ilog2(wqsize);
852 qhp->wq.rq_size_log2 = ilog2(rqsize); 925 qhp->wq.rq_size_log2 = ilog2(rqsize);
853 qhp->wq.sq_size_log2 = ilog2(sqsize); 926 qhp->wq.sq_size_log2 = ilog2(sqsize);
854 ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL;
855 if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq, 927 if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq,
856 ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) { 928 ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) {
857 kfree(qhp); 929 kfree(qhp);
@@ -935,10 +1007,10 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
935 qhp->ibqp.qp_num = qhp->wq.qpid; 1007 qhp->ibqp.qp_num = qhp->wq.qpid;
936 init_timer(&(qhp->timer)); 1008 init_timer(&(qhp->timer));
937 PDBG("%s sq_num_entries %d, rq_num_entries %d " 1009 PDBG("%s sq_num_entries %d, rq_num_entries %d "
938 "qpid 0x%0x qhp %p dma_addr 0x%llx size %d\n", 1010 "qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n",
939 __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, 1011 __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
940 qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr, 1012 qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr,
941 1 << qhp->wq.size_log2); 1013 1 << qhp->wq.size_log2, qhp->wq.rq_addr);
942 return &qhp->ibqp; 1014 return &qhp->ibqp;
943} 1015}
944 1016
@@ -1023,6 +1095,29 @@ static int iwch_query_gid(struct ib_device *ibdev, u8 port,
1023 return 0; 1095 return 0;
1024} 1096}
1025 1097
1098static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev)
1099{
1100 struct ethtool_drvinfo info;
1101 struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
1102 char *cp, *next;
1103 unsigned fw_maj, fw_min, fw_mic;
1104
1105 rtnl_lock();
1106 lldev->ethtool_ops->get_drvinfo(lldev, &info);
1107 rtnl_unlock();
1108
1109 next = info.fw_version + 1;
1110 cp = strsep(&next, ".");
1111 sscanf(cp, "%i", &fw_maj);
1112 cp = strsep(&next, ".");
1113 sscanf(cp, "%i", &fw_min);
1114 cp = strsep(&next, ".");
1115 sscanf(cp, "%i", &fw_mic);
1116
1117 return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) |
1118 (fw_mic & 0xffff);
1119}
1120
1026static int iwch_query_device(struct ib_device *ibdev, 1121static int iwch_query_device(struct ib_device *ibdev,
1027 struct ib_device_attr *props) 1122 struct ib_device_attr *props)
1028{ 1123{
@@ -1033,7 +1128,10 @@ static int iwch_query_device(struct ib_device *ibdev,
1033 dev = to_iwch_dev(ibdev); 1128 dev = to_iwch_dev(ibdev);
1034 memset(props, 0, sizeof *props); 1129 memset(props, 0, sizeof *props);
1035 memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); 1130 memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
1131 props->hw_ver = dev->rdev.t3cdev_p->type;
1132 props->fw_ver = fw_vers_string_to_u64(dev);
1036 props->device_cap_flags = dev->device_cap_flags; 1133 props->device_cap_flags = dev->device_cap_flags;
1134 props->page_size_cap = dev->attr.mem_pgsizes_bitmask;
1037 props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor; 1135 props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
1038 props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device; 1136 props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
1039 props->max_mr_size = dev->attr.max_mr_size; 1137 props->max_mr_size = dev->attr.max_mr_size;
@@ -1048,6 +1146,7 @@ static int iwch_query_device(struct ib_device *ibdev,
1048 props->max_mr = dev->attr.max_mem_regs; 1146 props->max_mr = dev->attr.max_mem_regs;
1049 props->max_pd = dev->attr.max_pds; 1147 props->max_pd = dev->attr.max_pds;
1050 props->local_ca_ack_delay = 0; 1148 props->local_ca_ack_delay = 0;
1149 props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH;
1051 1150
1052 return 0; 1151 return 0;
1053} 1152}
@@ -1088,6 +1187,28 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
1088 return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); 1187 return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
1089} 1188}
1090 1189
1190static int fw_supports_fastreg(struct iwch_dev *iwch_dev)
1191{
1192 struct ethtool_drvinfo info;
1193 struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
1194 char *cp, *next;
1195 unsigned fw_maj, fw_min;
1196
1197 rtnl_lock();
1198 lldev->ethtool_ops->get_drvinfo(lldev, &info);
1199 rtnl_unlock();
1200
1201 next = info.fw_version+1;
1202 cp = strsep(&next, ".");
1203 sscanf(cp, "%i", &fw_maj);
1204 cp = strsep(&next, ".");
1205 sscanf(cp, "%i", &fw_min);
1206
1207 PDBG("%s maj %u min %u\n", __func__, fw_maj, fw_min);
1208
1209 return fw_maj > 6 || (fw_maj == 6 && fw_min > 0);
1210}
1211
1091static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf) 1212static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf)
1092{ 1213{
1093 struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, 1214 struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
@@ -1127,6 +1248,61 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
1127 iwch_dev->rdev.rnic_info.pdev->device); 1248 iwch_dev->rdev.rnic_info.pdev->device);
1128} 1249}
1129 1250
1251static int iwch_get_mib(struct ib_device *ibdev,
1252 union rdma_protocol_stats *stats)
1253{
1254 struct iwch_dev *dev;
1255 struct tp_mib_stats m;
1256 int ret;
1257
1258 PDBG("%s ibdev %p\n", __func__, ibdev);
1259 dev = to_iwch_dev(ibdev);
1260 ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m);
1261 if (ret)
1262 return -ENOSYS;
1263
1264 memset(stats, 0, sizeof *stats);
1265 stats->iw.ipInReceives = ((u64) m.ipInReceive_hi << 32) +
1266 m.ipInReceive_lo;
1267 stats->iw.ipInHdrErrors = ((u64) m.ipInHdrErrors_hi << 32) +
1268 m.ipInHdrErrors_lo;
1269 stats->iw.ipInAddrErrors = ((u64) m.ipInAddrErrors_hi << 32) +
1270 m.ipInAddrErrors_lo;
1271 stats->iw.ipInUnknownProtos = ((u64) m.ipInUnknownProtos_hi << 32) +
1272 m.ipInUnknownProtos_lo;
1273 stats->iw.ipInDiscards = ((u64) m.ipInDiscards_hi << 32) +
1274 m.ipInDiscards_lo;
1275 stats->iw.ipInDelivers = ((u64) m.ipInDelivers_hi << 32) +
1276 m.ipInDelivers_lo;
1277 stats->iw.ipOutRequests = ((u64) m.ipOutRequests_hi << 32) +
1278 m.ipOutRequests_lo;
1279 stats->iw.ipOutDiscards = ((u64) m.ipOutDiscards_hi << 32) +
1280 m.ipOutDiscards_lo;
1281 stats->iw.ipOutNoRoutes = ((u64) m.ipOutNoRoutes_hi << 32) +
1282 m.ipOutNoRoutes_lo;
1283 stats->iw.ipReasmTimeout = (u64) m.ipReasmTimeout;
1284 stats->iw.ipReasmReqds = (u64) m.ipReasmReqds;
1285 stats->iw.ipReasmOKs = (u64) m.ipReasmOKs;
1286 stats->iw.ipReasmFails = (u64) m.ipReasmFails;
1287 stats->iw.tcpActiveOpens = (u64) m.tcpActiveOpens;
1288 stats->iw.tcpPassiveOpens = (u64) m.tcpPassiveOpens;
1289 stats->iw.tcpAttemptFails = (u64) m.tcpAttemptFails;
1290 stats->iw.tcpEstabResets = (u64) m.tcpEstabResets;
1291 stats->iw.tcpOutRsts = (u64) m.tcpOutRsts;
1292 stats->iw.tcpCurrEstab = (u64) m.tcpCurrEstab;
1293 stats->iw.tcpInSegs = ((u64) m.tcpInSegs_hi << 32) +
1294 m.tcpInSegs_lo;
1295 stats->iw.tcpOutSegs = ((u64) m.tcpOutSegs_hi << 32) +
1296 m.tcpOutSegs_lo;
1297 stats->iw.tcpRetransSegs = ((u64) m.tcpRetransSeg_hi << 32) +
1298 m.tcpRetransSeg_lo;
1299 stats->iw.tcpInErrs = ((u64) m.tcpInErrs_hi << 32) +
1300 m.tcpInErrs_lo;
1301 stats->iw.tcpRtoMin = (u64) m.tcpRtoMin;
1302 stats->iw.tcpRtoMax = (u64) m.tcpRtoMax;
1303 return 0;
1304}
1305
1130static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 1306static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
1131static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); 1307static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
1132static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 1308static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
@@ -1136,7 +1312,7 @@ static struct device_attribute *iwch_class_attributes[] = {
1136 &dev_attr_hw_rev, 1312 &dev_attr_hw_rev,
1137 &dev_attr_fw_ver, 1313 &dev_attr_fw_ver,
1138 &dev_attr_hca_type, 1314 &dev_attr_hca_type,
1139 &dev_attr_board_id 1315 &dev_attr_board_id,
1140}; 1316};
1141 1317
1142int iwch_register_device(struct iwch_dev *dev) 1318int iwch_register_device(struct iwch_dev *dev)
@@ -1149,8 +1325,12 @@ int iwch_register_device(struct iwch_dev *dev)
1149 memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); 1325 memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
1150 memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); 1326 memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
1151 dev->ibdev.owner = THIS_MODULE; 1327 dev->ibdev.owner = THIS_MODULE;
1152 dev->device_cap_flags = 1328 dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
1153 (IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW); 1329
1330 /* cxgb3 supports STag 0. */
1331 dev->ibdev.local_dma_lkey = 0;
1332 if (fw_supports_fastreg(dev))
1333 dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
1154 1334
1155 dev->ibdev.uverbs_cmd_mask = 1335 dev->ibdev.uverbs_cmd_mask =
1156 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 1336 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -1202,15 +1382,16 @@ int iwch_register_device(struct iwch_dev *dev)
1202 dev->ibdev.alloc_mw = iwch_alloc_mw; 1382 dev->ibdev.alloc_mw = iwch_alloc_mw;
1203 dev->ibdev.bind_mw = iwch_bind_mw; 1383 dev->ibdev.bind_mw = iwch_bind_mw;
1204 dev->ibdev.dealloc_mw = iwch_dealloc_mw; 1384 dev->ibdev.dealloc_mw = iwch_dealloc_mw;
1205 1385 dev->ibdev.alloc_fast_reg_mr = iwch_alloc_fast_reg_mr;
1386 dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl;
1387 dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl;
1206 dev->ibdev.attach_mcast = iwch_multicast_attach; 1388 dev->ibdev.attach_mcast = iwch_multicast_attach;
1207 dev->ibdev.detach_mcast = iwch_multicast_detach; 1389 dev->ibdev.detach_mcast = iwch_multicast_detach;
1208 dev->ibdev.process_mad = iwch_process_mad; 1390 dev->ibdev.process_mad = iwch_process_mad;
1209
1210 dev->ibdev.req_notify_cq = iwch_arm_cq; 1391 dev->ibdev.req_notify_cq = iwch_arm_cq;
1211 dev->ibdev.post_send = iwch_post_send; 1392 dev->ibdev.post_send = iwch_post_send;
1212 dev->ibdev.post_recv = iwch_post_receive; 1393 dev->ibdev.post_recv = iwch_post_receive;
1213 1394 dev->ibdev.get_protocol_stats = iwch_get_mib;
1214 1395
1215 dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); 1396 dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
1216 if (!dev->ibdev.iwcm) 1397 if (!dev->ibdev.iwcm)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 836163fc5429..f5ceca05c435 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -296,14 +296,6 @@ static inline u32 iwch_ib_to_tpt_access(int acc)
296 TPT_LOCAL_READ; 296 TPT_LOCAL_READ;
297} 297}
298 298
299static inline u32 iwch_ib_to_mwbind_access(int acc)
300{
301 return (acc & IB_ACCESS_REMOTE_WRITE ? T3_MEM_ACCESS_REM_WRITE : 0) |
302 (acc & IB_ACCESS_REMOTE_READ ? T3_MEM_ACCESS_REM_READ : 0) |
303 (acc & IB_ACCESS_LOCAL_WRITE ? T3_MEM_ACCESS_LOCAL_WRITE : 0) |
304 T3_MEM_ACCESS_LOCAL_READ;
305}
306
307enum iwch_mmid_state { 299enum iwch_mmid_state {
308 IWCH_STAG_STATE_VALID, 300 IWCH_STAG_STATE_VALID,
309 IWCH_STAG_STATE_INVALID 301 IWCH_STAG_STATE_INVALID
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 992613799228..9a3be3a9d5dc 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -33,10 +33,11 @@
33#include "iwch.h" 33#include "iwch.h"
34#include "iwch_cm.h" 34#include "iwch_cm.h"
35#include "cxio_hal.h" 35#include "cxio_hal.h"
36#include "cxio_resource.h"
36 37
37#define NO_SUPPORT -1 38#define NO_SUPPORT -1
38 39
39static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, 40static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
40 u8 * flit_cnt) 41 u8 * flit_cnt)
41{ 42{
42 int i; 43 int i;
@@ -44,59 +45,44 @@ static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
44 45
45 switch (wr->opcode) { 46 switch (wr->opcode) {
46 case IB_WR_SEND: 47 case IB_WR_SEND:
47 case IB_WR_SEND_WITH_IMM:
48 if (wr->send_flags & IB_SEND_SOLICITED) 48 if (wr->send_flags & IB_SEND_SOLICITED)
49 wqe->send.rdmaop = T3_SEND_WITH_SE; 49 wqe->send.rdmaop = T3_SEND_WITH_SE;
50 else 50 else
51 wqe->send.rdmaop = T3_SEND; 51 wqe->send.rdmaop = T3_SEND;
52 wqe->send.rem_stag = 0; 52 wqe->send.rem_stag = 0;
53 break; 53 break;
54#if 0 /* Not currently supported */ 54 case IB_WR_SEND_WITH_INV:
55 case TYPE_SEND_INVALIDATE: 55 if (wr->send_flags & IB_SEND_SOLICITED)
56 case TYPE_SEND_INVALIDATE_IMMEDIATE: 56 wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
57 wqe->send.rdmaop = T3_SEND_WITH_INV; 57 else
58 wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); 58 wqe->send.rdmaop = T3_SEND_WITH_INV;
59 break; 59 wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey);
60 case TYPE_SEND_SE_INVALIDATE:
61 wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
62 wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
63 break; 60 break;
64#endif
65 default: 61 default:
66 break; 62 return -EINVAL;
67 } 63 }
68 if (wr->num_sge > T3_MAX_SGE) 64 if (wr->num_sge > T3_MAX_SGE)
69 return -EINVAL; 65 return -EINVAL;
70 wqe->send.reserved[0] = 0; 66 wqe->send.reserved[0] = 0;
71 wqe->send.reserved[1] = 0; 67 wqe->send.reserved[1] = 0;
72 wqe->send.reserved[2] = 0; 68 wqe->send.reserved[2] = 0;
73 if (wr->opcode == IB_WR_SEND_WITH_IMM) { 69 plen = 0;
74 plen = 4; 70 for (i = 0; i < wr->num_sge; i++) {
75 wqe->send.sgl[0].stag = wr->ex.imm_data; 71 if ((plen + wr->sg_list[i].length) < plen)
76 wqe->send.sgl[0].len = __constant_cpu_to_be32(0); 72 return -EMSGSIZE;
77 wqe->send.num_sgle = __constant_cpu_to_be32(0); 73
78 *flit_cnt = 5; 74 plen += wr->sg_list[i].length;
79 } else { 75 wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
80 plen = 0; 76 wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
81 for (i = 0; i < wr->num_sge; i++) { 77 wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
82 if ((plen + wr->sg_list[i].length) < plen) {
83 return -EMSGSIZE;
84 }
85 plen += wr->sg_list[i].length;
86 wqe->send.sgl[i].stag =
87 cpu_to_be32(wr->sg_list[i].lkey);
88 wqe->send.sgl[i].len =
89 cpu_to_be32(wr->sg_list[i].length);
90 wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
91 }
92 wqe->send.num_sgle = cpu_to_be32(wr->num_sge);
93 *flit_cnt = 4 + ((wr->num_sge) << 1);
94 } 78 }
79 wqe->send.num_sgle = cpu_to_be32(wr->num_sge);
80 *flit_cnt = 4 + ((wr->num_sge) << 1);
95 wqe->send.plen = cpu_to_be32(plen); 81 wqe->send.plen = cpu_to_be32(plen);
96 return 0; 82 return 0;
97} 83}
98 84
99static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, 85static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
100 u8 *flit_cnt) 86 u8 *flit_cnt)
101{ 87{
102 int i; 88 int i;
@@ -137,15 +123,18 @@ static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
137 return 0; 123 return 0;
138} 124}
139 125
140static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, 126static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
141 u8 *flit_cnt) 127 u8 *flit_cnt)
142{ 128{
143 if (wr->num_sge > 1) 129 if (wr->num_sge > 1)
144 return -EINVAL; 130 return -EINVAL;
145 wqe->read.rdmaop = T3_READ_REQ; 131 wqe->read.rdmaop = T3_READ_REQ;
132 if (wr->opcode == IB_WR_RDMA_READ_WITH_INV)
133 wqe->read.local_inv = 1;
134 else
135 wqe->read.local_inv = 0;
146 wqe->read.reserved[0] = 0; 136 wqe->read.reserved[0] = 0;
147 wqe->read.reserved[1] = 0; 137 wqe->read.reserved[1] = 0;
148 wqe->read.reserved[2] = 0;
149 wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); 138 wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
150 wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr); 139 wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr);
151 wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey); 140 wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
@@ -155,6 +144,57 @@ static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
155 return 0; 144 return 0;
156} 145}
157 146
147static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr,
148 u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
149{
150 int i;
151 __be64 *p;
152
153 if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH)
154 return -EINVAL;
155 *wr_cnt = 1;
156 wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey);
157 wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length);
158 wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32);
159 wqe->fastreg.va_base_lo_fbo =
160 cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff);
161 wqe->fastreg.page_type_perms = cpu_to_be32(
162 V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) |
163 V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) |
164 V_FR_TYPE(TPT_VATO) |
165 V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags)));
166 p = &wqe->fastreg.pbl_addrs[0];
167 for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) {
168
169 /* If we need a 2nd WR, then set it up */
170 if (i == T3_MAX_FASTREG_FRAG) {
171 *wr_cnt = 2;
172 wqe = (union t3_wr *)(wq->queue +
173 Q_PTR2IDX((wq->wptr+1), wq->size_log2));
174 build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0,
175 Q_GENBIT(wq->wptr + 1, wq->size_log2),
176 0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG,
177 T3_EOP);
178
179 p = &wqe->pbl_frag.pbl_addrs[0];
180 }
181 *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]);
182 }
183 *flit_cnt = 5 + wr->wr.fast_reg.page_list_len;
184 if (*flit_cnt > 15)
185 *flit_cnt = 15;
186 return 0;
187}
188
189static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr,
190 u8 *flit_cnt)
191{
192 wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey);
193 wqe->local_inv.reserved = 0;
194 *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3;
195 return 0;
196}
197
158/* 198/*
159 * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now. 199 * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
160 */ 200 */
@@ -205,23 +245,106 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
205 return 0; 245 return 0;
206} 246}
207 247
208static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe, 248static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
209 struct ib_recv_wr *wr) 249 struct ib_recv_wr *wr)
210{ 250{
211 int i; 251 int i, err = 0;
212 if (wr->num_sge > T3_MAX_SGE) 252 u32 pbl_addr[T3_MAX_SGE];
213 return -EINVAL; 253 u8 page_size[T3_MAX_SGE];
254
255 err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr,
256 page_size);
257 if (err)
258 return err;
259 wqe->recv.pagesz[0] = page_size[0];
260 wqe->recv.pagesz[1] = page_size[1];
261 wqe->recv.pagesz[2] = page_size[2];
262 wqe->recv.pagesz[3] = page_size[3];
214 wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); 263 wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
215 for (i = 0; i < wr->num_sge; i++) { 264 for (i = 0; i < wr->num_sge; i++) {
216 wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); 265 wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
217 wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); 266 wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
267
268 /* to in the WQE == the offset into the page */
269 wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) %
270 (1UL << (12 + page_size[i])));
271
272 /* pbl_addr is the adapters address in the PBL */
273 wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
274 }
275 for (; i < T3_MAX_SGE; i++) {
276 wqe->recv.sgl[i].stag = 0;
277 wqe->recv.sgl[i].len = 0;
278 wqe->recv.sgl[i].to = 0;
279 wqe->recv.pbl_addr[i] = 0;
280 }
281 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
282 qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
283 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
284 qhp->wq.rq_size_log2)].pbl_addr = 0;
285 return 0;
286}
287
288static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
289 struct ib_recv_wr *wr)
290{
291 int i;
292 u32 pbl_addr;
293 u32 pbl_offset;
294
295
296 /*
297 * The T3 HW requires the PBL in the HW recv descriptor to reference
298 * a PBL entry. So we allocate the max needed PBL memory here and pass
299 * it to the uP in the recv WR. The uP will build the PBL and setup
300 * the HW recv descriptor.
301 */
302 pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE);
303 if (!pbl_addr)
304 return -ENOMEM;
305
306 /*
307 * Compute the 8B aligned offset.
308 */
309 pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3;
310
311 wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
312
313 for (i = 0; i < wr->num_sge; i++) {
314
315 /*
316 * Use a 128MB page size. This and an imposed 128MB
317 * sge length limit allows us to require only a 2-entry HW
318 * PBL for each SGE. This restriction is acceptable since
319 * since it is not possible to allocate 128MB of contiguous
320 * DMA coherent memory!
321 */
322 if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN)
323 return -EINVAL;
324 wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT;
325
326 /*
327 * T3 restricts a recv to all zero-stag or all non-zero-stag.
328 */
329 if (wr->sg_list[i].lkey != 0)
330 return -EINVAL;
331 wqe->recv.sgl[i].stag = 0;
332 wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
218 wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); 333 wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
334 wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset);
335 pbl_offset += 2;
219 } 336 }
220 for (; i < T3_MAX_SGE; i++) { 337 for (; i < T3_MAX_SGE; i++) {
338 wqe->recv.pagesz[i] = 0;
221 wqe->recv.sgl[i].stag = 0; 339 wqe->recv.sgl[i].stag = 0;
222 wqe->recv.sgl[i].len = 0; 340 wqe->recv.sgl[i].len = 0;
223 wqe->recv.sgl[i].to = 0; 341 wqe->recv.sgl[i].to = 0;
342 wqe->recv.pbl_addr[i] = 0;
224 } 343 }
344 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
345 qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
346 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
347 qhp->wq.rq_size_log2)].pbl_addr = pbl_addr;
225 return 0; 348 return 0;
226} 349}
227 350
@@ -238,6 +361,7 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
238 u32 num_wrs; 361 u32 num_wrs;
239 unsigned long flag; 362 unsigned long flag;
240 struct t3_swsq *sqp; 363 struct t3_swsq *sqp;
364 int wr_cnt = 1;
241 365
242 qhp = to_iwch_qp(ibqp); 366 qhp = to_iwch_qp(ibqp);
243 spin_lock_irqsave(&qhp->lock, flag); 367 spin_lock_irqsave(&qhp->lock, flag);
@@ -262,33 +386,45 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
262 t3_wr_flags = 0; 386 t3_wr_flags = 0;
263 if (wr->send_flags & IB_SEND_SOLICITED) 387 if (wr->send_flags & IB_SEND_SOLICITED)
264 t3_wr_flags |= T3_SOLICITED_EVENT_FLAG; 388 t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
265 if (wr->send_flags & IB_SEND_FENCE)
266 t3_wr_flags |= T3_READ_FENCE_FLAG;
267 if (wr->send_flags & IB_SEND_SIGNALED) 389 if (wr->send_flags & IB_SEND_SIGNALED)
268 t3_wr_flags |= T3_COMPLETION_FLAG; 390 t3_wr_flags |= T3_COMPLETION_FLAG;
269 sqp = qhp->wq.sq + 391 sqp = qhp->wq.sq +
270 Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); 392 Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
271 switch (wr->opcode) { 393 switch (wr->opcode) {
272 case IB_WR_SEND: 394 case IB_WR_SEND:
273 case IB_WR_SEND_WITH_IMM: 395 case IB_WR_SEND_WITH_INV:
396 if (wr->send_flags & IB_SEND_FENCE)
397 t3_wr_flags |= T3_READ_FENCE_FLAG;
274 t3_wr_opcode = T3_WR_SEND; 398 t3_wr_opcode = T3_WR_SEND;
275 err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt); 399 err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
276 break; 400 break;
277 case IB_WR_RDMA_WRITE: 401 case IB_WR_RDMA_WRITE:
278 case IB_WR_RDMA_WRITE_WITH_IMM: 402 case IB_WR_RDMA_WRITE_WITH_IMM:
279 t3_wr_opcode = T3_WR_WRITE; 403 t3_wr_opcode = T3_WR_WRITE;
280 err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt); 404 err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
281 break; 405 break;
282 case IB_WR_RDMA_READ: 406 case IB_WR_RDMA_READ:
407 case IB_WR_RDMA_READ_WITH_INV:
283 t3_wr_opcode = T3_WR_READ; 408 t3_wr_opcode = T3_WR_READ;
284 t3_wr_flags = 0; /* T3 reads are always signaled */ 409 t3_wr_flags = 0; /* T3 reads are always signaled */
285 err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt); 410 err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
286 if (err) 411 if (err)
287 break; 412 break;
288 sqp->read_len = wqe->read.local_len; 413 sqp->read_len = wqe->read.local_len;
289 if (!qhp->wq.oldest_read) 414 if (!qhp->wq.oldest_read)
290 qhp->wq.oldest_read = sqp; 415 qhp->wq.oldest_read = sqp;
291 break; 416 break;
417 case IB_WR_FAST_REG_MR:
418 t3_wr_opcode = T3_WR_FASTREG;
419 err = build_fastreg(wqe, wr, &t3_wr_flit_cnt,
420 &wr_cnt, &qhp->wq);
421 break;
422 case IB_WR_LOCAL_INV:
423 if (wr->send_flags & IB_SEND_FENCE)
424 t3_wr_flags |= T3_LOCAL_FENCE_FLAG;
425 t3_wr_opcode = T3_WR_INV_STAG;
426 err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt);
427 break;
292 default: 428 default:
293 PDBG("%s post of type=%d TBD!\n", __func__, 429 PDBG("%s post of type=%d TBD!\n", __func__,
294 wr->opcode); 430 wr->opcode);
@@ -307,14 +443,15 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
307 443
308 build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags, 444 build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
309 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 445 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
310 0, t3_wr_flit_cnt); 446 0, t3_wr_flit_cnt,
447 (wr_cnt == 1) ? T3_SOPEOP : T3_SOP);
311 PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n", 448 PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
312 __func__, (unsigned long long) wr->wr_id, idx, 449 __func__, (unsigned long long) wr->wr_id, idx,
313 Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2), 450 Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
314 sqp->opcode); 451 sqp->opcode);
315 wr = wr->next; 452 wr = wr->next;
316 num_wrs--; 453 num_wrs--;
317 ++(qhp->wq.wptr); 454 qhp->wq.wptr += wr_cnt;
318 ++(qhp->wq.sq_wptr); 455 ++(qhp->wq.sq_wptr);
319 } 456 }
320 spin_unlock_irqrestore(&qhp->lock, flag); 457 spin_unlock_irqrestore(&qhp->lock, flag);
@@ -345,21 +482,27 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
345 return -EINVAL; 482 return -EINVAL;
346 } 483 }
347 while (wr) { 484 while (wr) {
485 if (wr->num_sge > T3_MAX_SGE) {
486 err = -EINVAL;
487 *bad_wr = wr;
488 break;
489 }
348 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); 490 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
349 wqe = (union t3_wr *) (qhp->wq.queue + idx); 491 wqe = (union t3_wr *) (qhp->wq.queue + idx);
350 if (num_wrs) 492 if (num_wrs)
351 err = iwch_build_rdma_recv(qhp->rhp, wqe, wr); 493 if (wr->sg_list[0].lkey)
494 err = build_rdma_recv(qhp, wqe, wr);
495 else
496 err = build_zero_stag_recv(qhp, wqe, wr);
352 else 497 else
353 err = -ENOMEM; 498 err = -ENOMEM;
354 if (err) { 499 if (err) {
355 *bad_wr = wr; 500 *bad_wr = wr;
356 break; 501 break;
357 } 502 }
358 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] =
359 wr->wr_id;
360 build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, 503 build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
361 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 504 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
362 0, sizeof(struct t3_receive_wr) >> 3); 505 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
363 PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x " 506 PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
364 "wqe %p \n", __func__, (unsigned long long) wr->wr_id, 507 "wqe %p \n", __func__, (unsigned long long) wr->wr_id,
365 idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe); 508 idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
@@ -419,10 +562,10 @@ int iwch_bind_mw(struct ib_qp *qp,
419 sgl.lkey = mw_bind->mr->lkey; 562 sgl.lkey = mw_bind->mr->lkey;
420 sgl.length = mw_bind->length; 563 sgl.length = mw_bind->length;
421 wqe->bind.reserved = 0; 564 wqe->bind.reserved = 0;
422 wqe->bind.type = T3_VA_BASED_TO; 565 wqe->bind.type = TPT_VATO;
423 566
424 /* TBD: check perms */ 567 /* TBD: check perms */
425 wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags); 568 wqe->bind.perms = iwch_ib_to_tpt_access(mw_bind->mw_access_flags);
426 wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey); 569 wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey);
427 wqe->bind.mw_stag = cpu_to_be32(mw->rkey); 570 wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
428 wqe->bind.mw_len = cpu_to_be32(mw_bind->length); 571 wqe->bind.mw_len = cpu_to_be32(mw_bind->length);
@@ -430,7 +573,7 @@ int iwch_bind_mw(struct ib_qp *qp,
430 err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size); 573 err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
431 if (err) { 574 if (err) {
432 spin_unlock_irqrestore(&qhp->lock, flag); 575 spin_unlock_irqrestore(&qhp->lock, flag);
433 return err; 576 return err;
434 } 577 }
435 wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; 578 wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
436 sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); 579 sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
@@ -441,10 +584,9 @@ int iwch_bind_mw(struct ib_qp *qp,
441 sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED); 584 sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
442 wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr); 585 wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
443 wqe->bind.mr_pagesz = page_size; 586 wqe->bind.mr_pagesz = page_size;
444 wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id;
445 build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags, 587 build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
446 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0, 588 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
447 sizeof(struct t3_bind_mw_wr) >> 3); 589 sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP);
448 ++(qhp->wq.wptr); 590 ++(qhp->wq.wptr);
449 ++(qhp->wq.sq_wptr); 591 ++(qhp->wq.sq_wptr);
450 spin_unlock_irqrestore(&qhp->lock, flag); 592 spin_unlock_irqrestore(&qhp->lock, flag);
@@ -758,7 +900,8 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
758 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); 900 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
759 init_attr.rqe_count = iwch_rqes_posted(qhp); 901 init_attr.rqe_count = iwch_rqes_posted(qhp);
760 init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; 902 init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
761 init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0; 903 if (!qhp->ibqp.uobject)
904 init_attr.flags |= PRIV_QP;
762 if (peer2peer) { 905 if (peer2peer) {
763 init_attr.rtr_type = RTR_READ; 906 init_attr.rtr_type = RTR_READ;
764 if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) 907 if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index ce1ab0571be3..0792d930c481 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -531,7 +531,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
531{ 531{
532 struct ehca_eq *eq = &shca->eq; 532 struct ehca_eq *eq = &shca->eq;
533 struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; 533 struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
534 u64 eqe_value; 534 u64 eqe_value, ret;
535 unsigned long flags; 535 unsigned long flags;
536 int eqe_cnt, i; 536 int eqe_cnt, i;
537 int eq_empty = 0; 537 int eq_empty = 0;
@@ -583,8 +583,13 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
583 ehca_dbg(&shca->ib_device, 583 ehca_dbg(&shca->ib_device,
584 "No eqe found for irq event"); 584 "No eqe found for irq event");
585 goto unlock_irq_spinlock; 585 goto unlock_irq_spinlock;
586 } else if (!is_irq) 586 } else if (!is_irq) {
587 ret = hipz_h_eoi(eq->ist);
588 if (ret != H_SUCCESS)
589 ehca_err(&shca->ib_device,
590 "bad return code EOI -rc = %ld\n", ret);
587 ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); 591 ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
592 }
588 if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) 593 if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
589 ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); 594 ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
590 /* enable irq for new packets */ 595 /* enable irq for new packets */
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 482103eb6eac..598844d2edc9 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -923,6 +923,7 @@ static struct of_device_id ehca_device_table[] =
923 }, 923 },
924 {}, 924 {},
925}; 925};
926MODULE_DEVICE_TABLE(of, ehca_device_table);
926 927
927static struct of_platform_driver ehca_driver = { 928static struct of_platform_driver ehca_driver = {
928 .name = "ehca", 929 .name = "ehca",
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index f093b0033daf..dd9bc68f1c7b 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -544,8 +544,16 @@ int ehca_post_recv(struct ib_qp *qp,
544 struct ib_recv_wr *recv_wr, 544 struct ib_recv_wr *recv_wr,
545 struct ib_recv_wr **bad_recv_wr) 545 struct ib_recv_wr **bad_recv_wr)
546{ 546{
547 return internal_post_recv(container_of(qp, struct ehca_qp, ib_qp), 547 struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
548 qp->device, recv_wr, bad_recv_wr); 548
549 /* Reject WR if QP is in RESET state */
550 if (unlikely(my_qp->state == IB_QPS_RESET)) {
551 ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
552 my_qp->state, qp->qp_num);
553 return -EINVAL;
554 }
555
556 return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr);
549} 557}
550 558
551int ehca_post_srq_recv(struct ib_srq *srq, 559int ehca_post_srq_recv(struct ib_srq *srq,
@@ -681,7 +689,7 @@ poll_cq_one_read_cqe:
681 wc->dlid_path_bits = cqe->dlid; 689 wc->dlid_path_bits = cqe->dlid;
682 wc->src_qp = cqe->remote_qp_number; 690 wc->src_qp = cqe->remote_qp_number;
683 wc->wc_flags = cqe->w_completion_flags; 691 wc->wc_flags = cqe->w_completion_flags;
684 wc->imm_data = cpu_to_be32(cqe->immediate_data); 692 wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
685 wc->sl = cqe->service_level; 693 wc->sl = cqe->service_level;
686 694
687poll_cq_one_exit0: 695poll_cq_one_exit0:
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 5245e13c3a30..415d3a465de6 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -933,3 +933,13 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
933 r_cb, 933 r_cb,
934 0, 0, 0, 0); 934 0, 0, 0, 0);
935} 935}
936
937u64 hipz_h_eoi(int irq)
938{
939 unsigned long xirr;
940
941 iosync();
942 xirr = (0xffULL << 24) | irq;
943
944 return plpar_hcall_norets(H_EOI, xirr);
945}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
index 60ce02b70663..2c3c6e0ea5c2 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.h
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -260,5 +260,6 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
260 const u64 ressource_handle, 260 const u64 ressource_handle,
261 void *rblock, 261 void *rblock,
262 unsigned long *byte_count); 262 unsigned long *byte_count);
263u64 hipz_h_eoi(int irq);
263 264
264#endif /* __HCP_IF_H__ */ 265#endif /* __HCP_IF_H__ */
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index a03bd28d9b48..d385e4168c97 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -82,7 +82,7 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
82 wc->uqueue[head].opcode = entry->opcode; 82 wc->uqueue[head].opcode = entry->opcode;
83 wc->uqueue[head].vendor_err = entry->vendor_err; 83 wc->uqueue[head].vendor_err = entry->vendor_err;
84 wc->uqueue[head].byte_len = entry->byte_len; 84 wc->uqueue[head].byte_len = entry->byte_len;
85 wc->uqueue[head].imm_data = (__u32 __force)entry->imm_data; 85 wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data;
86 wc->uqueue[head].qp_num = entry->qp->qp_num; 86 wc->uqueue[head].qp_num = entry->qp->qp_num;
87 wc->uqueue[head].src_qp = entry->src_qp; 87 wc->uqueue[head].src_qp = entry->src_qp;
88 wc->uqueue[head].wc_flags = entry->wc_flags; 88 wc->uqueue[head].wc_flags = entry->wc_flags;
diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c
index 8eee7830f042..fb70712ac85c 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba7220.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c
@@ -2228,8 +2228,8 @@ static void ipath_autoneg_send(struct ipath_devdata *dd, int which)
2228 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 2228 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
2229 0x40000001, 0x1388, 0x15e, /* rest 0's */ 2229 0x40000001, 0x1388, 0x15e, /* rest 0's */
2230 }; 2230 };
2231 dcnt = sizeof(madpayload_start)/sizeof(madpayload_start[0]); 2231 dcnt = ARRAY_SIZE(madpayload_start);
2232 hcnt = sizeof(hdr)/sizeof(hdr[0]); 2232 hcnt = ARRAY_SIZE(hdr);
2233 if (!swapped) { 2233 if (!swapped) {
2234 /* for maintainability, do it at runtime */ 2234 /* for maintainability, do it at runtime */
2235 for (i = 0; i < hcnt; i++) { 2235 for (i = 0; i < hcnt; i++) {
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index 5f9315d77a43..be4fc9ada8e7 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -111,9 +111,9 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
111 nip->revision = cpu_to_be32((majrev << 16) | minrev); 111 nip->revision = cpu_to_be32((majrev << 16) | minrev);
112 nip->local_port_num = port; 112 nip->local_port_num = port;
113 vendor = dd->ipath_vendorid; 113 vendor = dd->ipath_vendorid;
114 nip->vendor_id[0] = 0; 114 nip->vendor_id[0] = IPATH_SRC_OUI_1;
115 nip->vendor_id[1] = vendor >> 8; 115 nip->vendor_id[1] = IPATH_SRC_OUI_2;
116 nip->vendor_id[2] = vendor; 116 nip->vendor_id[2] = IPATH_SRC_OUI_3;
117 117
118 return reply(smp); 118 return reply(smp);
119} 119}
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 108df667d2ee..97710522624d 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -1703,11 +1703,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1703 case OP(SEND_LAST_WITH_IMMEDIATE): 1703 case OP(SEND_LAST_WITH_IMMEDIATE):
1704 send_last_imm: 1704 send_last_imm:
1705 if (header_in_data) { 1705 if (header_in_data) {
1706 wc.imm_data = *(__be32 *) data; 1706 wc.ex.imm_data = *(__be32 *) data;
1707 data += sizeof(__be32); 1707 data += sizeof(__be32);
1708 } else { 1708 } else {
1709 /* Immediate data comes after BTH */ 1709 /* Immediate data comes after BTH */
1710 wc.imm_data = ohdr->u.imm_data; 1710 wc.ex.imm_data = ohdr->u.imm_data;
1711 } 1711 }
1712 hdrsize += 4; 1712 hdrsize += 4;
1713 wc.wc_flags = IB_WC_WITH_IMM; 1713 wc.wc_flags = IB_WC_WITH_IMM;
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index a4b5521567fe..af051f757663 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -331,7 +331,7 @@ again:
331 switch (wqe->wr.opcode) { 331 switch (wqe->wr.opcode) {
332 case IB_WR_SEND_WITH_IMM: 332 case IB_WR_SEND_WITH_IMM:
333 wc.wc_flags = IB_WC_WITH_IMM; 333 wc.wc_flags = IB_WC_WITH_IMM;
334 wc.imm_data = wqe->wr.ex.imm_data; 334 wc.ex.imm_data = wqe->wr.ex.imm_data;
335 /* FALLTHROUGH */ 335 /* FALLTHROUGH */
336 case IB_WR_SEND: 336 case IB_WR_SEND:
337 if (!ipath_get_rwqe(qp, 0)) 337 if (!ipath_get_rwqe(qp, 0))
@@ -342,7 +342,7 @@ again:
342 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) 342 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
343 goto inv_err; 343 goto inv_err;
344 wc.wc_flags = IB_WC_WITH_IMM; 344 wc.wc_flags = IB_WC_WITH_IMM;
345 wc.imm_data = wqe->wr.ex.imm_data; 345 wc.ex.imm_data = wqe->wr.ex.imm_data;
346 if (!ipath_get_rwqe(qp, 1)) 346 if (!ipath_get_rwqe(qp, 1))
347 goto rnr_nak; 347 goto rnr_nak;
348 /* FALLTHROUGH */ 348 /* FALLTHROUGH */
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 0596ec16fcbd..82cc588b8bf2 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -379,11 +379,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
379 case OP(SEND_LAST_WITH_IMMEDIATE): 379 case OP(SEND_LAST_WITH_IMMEDIATE):
380 send_last_imm: 380 send_last_imm:
381 if (header_in_data) { 381 if (header_in_data) {
382 wc.imm_data = *(__be32 *) data; 382 wc.ex.imm_data = *(__be32 *) data;
383 data += sizeof(__be32); 383 data += sizeof(__be32);
384 } else { 384 } else {
385 /* Immediate data comes after BTH */ 385 /* Immediate data comes after BTH */
386 wc.imm_data = ohdr->u.imm_data; 386 wc.ex.imm_data = ohdr->u.imm_data;
387 } 387 }
388 hdrsize += 4; 388 hdrsize += 4;
389 wc.wc_flags = IB_WC_WITH_IMM; 389 wc.wc_flags = IB_WC_WITH_IMM;
@@ -483,11 +483,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
483 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): 483 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
484 rdma_last_imm: 484 rdma_last_imm:
485 if (header_in_data) { 485 if (header_in_data) {
486 wc.imm_data = *(__be32 *) data; 486 wc.ex.imm_data = *(__be32 *) data;
487 data += sizeof(__be32); 487 data += sizeof(__be32);
488 } else { 488 } else {
489 /* Immediate data comes after BTH */ 489 /* Immediate data comes after BTH */
490 wc.imm_data = ohdr->u.imm_data; 490 wc.ex.imm_data = ohdr->u.imm_data;
491 } 491 }
492 hdrsize += 4; 492 hdrsize += 4;
493 wc.wc_flags = IB_WC_WITH_IMM; 493 wc.wc_flags = IB_WC_WITH_IMM;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 77ca8ca74e78..36aa242c487c 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -96,7 +96,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
96 96
97 if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { 97 if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
98 wc.wc_flags = IB_WC_WITH_IMM; 98 wc.wc_flags = IB_WC_WITH_IMM;
99 wc.imm_data = swqe->wr.ex.imm_data; 99 wc.ex.imm_data = swqe->wr.ex.imm_data;
100 } 100 }
101 101
102 /* 102 /*
@@ -492,14 +492,14 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
492 if (qp->ibqp.qp_num > 1 && 492 if (qp->ibqp.qp_num > 1 &&
493 opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { 493 opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
494 if (header_in_data) { 494 if (header_in_data) {
495 wc.imm_data = *(__be32 *) data; 495 wc.ex.imm_data = *(__be32 *) data;
496 data += sizeof(__be32); 496 data += sizeof(__be32);
497 } else 497 } else
498 wc.imm_data = ohdr->u.ud.imm_data; 498 wc.ex.imm_data = ohdr->u.ud.imm_data;
499 wc.wc_flags = IB_WC_WITH_IMM; 499 wc.wc_flags = IB_WC_WITH_IMM;
500 hdrsize += sizeof(u32); 500 hdrsize += sizeof(u32);
501 } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { 501 } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
502 wc.imm_data = 0; 502 wc.ex.imm_data = 0;
503 wc.wc_flags = 0; 503 wc.wc_flags = 0;
504 } else { 504 } else {
505 dev->n_pkt_drops++; 505 dev->n_pkt_drops++;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 7779165b2c2c..9e23ab0b51a1 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -1497,7 +1497,8 @@ static int ipath_query_device(struct ib_device *ibdev,
1497 IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | 1497 IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
1498 IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; 1498 IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
1499 props->page_size_cap = PAGE_SIZE; 1499 props->page_size_cap = PAGE_SIZE;
1500 props->vendor_id = dev->dd->ipath_vendorid; 1500 props->vendor_id =
1501 IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
1501 props->vendor_part_id = dev->dd->ipath_deviceid; 1502 props->vendor_part_id = dev->dd->ipath_deviceid;
1502 props->hw_ver = dev->dd->ipath_pcirev; 1503 props->hw_ver = dev->dd->ipath_pcirev;
1503 1504
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 4521319b1406..299f20832ab6 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -663,18 +663,18 @@ repoll:
663 663
664 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { 664 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
665 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: 665 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
666 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; 666 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
667 wc->wc_flags = IB_WC_WITH_IMM; 667 wc->wc_flags = IB_WC_WITH_IMM;
668 wc->imm_data = cqe->immed_rss_invalid; 668 wc->ex.imm_data = cqe->immed_rss_invalid;
669 break; 669 break;
670 case MLX4_RECV_OPCODE_SEND: 670 case MLX4_RECV_OPCODE_SEND:
671 wc->opcode = IB_WC_RECV; 671 wc->opcode = IB_WC_RECV;
672 wc->wc_flags = 0; 672 wc->wc_flags = 0;
673 break; 673 break;
674 case MLX4_RECV_OPCODE_SEND_IMM: 674 case MLX4_RECV_OPCODE_SEND_IMM:
675 wc->opcode = IB_WC_RECV; 675 wc->opcode = IB_WC_RECV;
676 wc->wc_flags = IB_WC_WITH_IMM; 676 wc->wc_flags = IB_WC_WITH_IMM;
677 wc->imm_data = cqe->immed_rss_invalid; 677 wc->ex.imm_data = cqe->immed_rss_invalid;
678 break; 678 break;
679 } 679 }
680 680
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 4c1e72fc8f57..cdca3a511e1c 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -255,7 +255,8 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
255 return IB_MAD_RESULT_SUCCESS; 255 return IB_MAD_RESULT_SUCCESS;
256 } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || 256 } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
257 in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 || 257 in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 ||
258 in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2) { 258 in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2 ||
259 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
259 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && 260 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
260 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) 261 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
261 return IB_MAD_RESULT_SUCCESS; 262 return IB_MAD_RESULT_SUCCESS;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 4d61e32866c6..bcf50648fa18 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -90,7 +90,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
90 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | 90 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
91 IB_DEVICE_PORT_ACTIVE_EVENT | 91 IB_DEVICE_PORT_ACTIVE_EVENT |
92 IB_DEVICE_SYS_IMAGE_GUID | 92 IB_DEVICE_SYS_IMAGE_GUID |
93 IB_DEVICE_RC_RNR_NAK_GEN; 93 IB_DEVICE_RC_RNR_NAK_GEN |
94 IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
94 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR) 95 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
95 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; 96 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
96 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) 97 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@@ -437,7 +438,9 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
437static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 438static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
438{ 439{
439 return mlx4_multicast_attach(to_mdev(ibqp->device)->dev, 440 return mlx4_multicast_attach(to_mdev(ibqp->device)->dev,
440 &to_mqp(ibqp)->mqp, gid->raw); 441 &to_mqp(ibqp)->mqp, gid->raw,
442 !!(to_mqp(ibqp)->flags &
443 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK));
441} 444}
442 445
443static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 446static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 5cf994794d25..c4cf5b69eefa 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -101,7 +101,8 @@ struct mlx4_ib_wq {
101}; 101};
102 102
103enum mlx4_ib_qp_flags { 103enum mlx4_ib_qp_flags {
104 MLX4_IB_QP_LSO = 1 << 0 104 MLX4_IB_QP_LSO = 1 << 0,
105 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
105}; 106};
106 107
107struct mlx4_ib_qp { 108struct mlx4_ib_qp {
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index a80df22deae8..89eb6cbe592e 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -129,9 +129,10 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
129 int ind; 129 int ind;
130 void *buf; 130 void *buf;
131 __be32 stamp; 131 __be32 stamp;
132 struct mlx4_wqe_ctrl_seg *ctrl;
132 133
133 s = roundup(size, 1U << qp->sq.wqe_shift);
134 if (qp->sq_max_wqes_per_wr > 1) { 134 if (qp->sq_max_wqes_per_wr > 1) {
135 s = roundup(size, 1U << qp->sq.wqe_shift);
135 for (i = 0; i < s; i += 64) { 136 for (i = 0; i < s; i += 64) {
136 ind = (i >> qp->sq.wqe_shift) + n; 137 ind = (i >> qp->sq.wqe_shift) + n;
137 stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : 138 stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) :
@@ -141,7 +142,8 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
141 *wqe = stamp; 142 *wqe = stamp;
142 } 143 }
143 } else { 144 } else {
144 buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); 145 ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
146 s = (ctrl->fence_size & 0x3f) << 4;
145 for (i = 64; i < s; i += 64) { 147 for (i = 64; i < s; i += 64) {
146 wqe = buf + i; 148 wqe = buf + i;
147 *wqe = cpu_to_be32(0xffffffff); 149 *wqe = cpu_to_be32(0xffffffff);
@@ -452,19 +454,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
452 spin_lock_init(&qp->rq.lock); 454 spin_lock_init(&qp->rq.lock);
453 455
454 qp->state = IB_QPS_RESET; 456 qp->state = IB_QPS_RESET;
455 qp->atomic_rd_en = 0;
456 qp->resp_depth = 0;
457
458 qp->rq.head = 0;
459 qp->rq.tail = 0;
460 qp->sq.head = 0;
461 qp->sq.tail = 0;
462 qp->sq_next_wqe = 0;
463
464 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) 457 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
465 qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); 458 qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
466 else
467 qp->sq_signal_bits = 0;
468 459
469 err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp); 460 err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp);
470 if (err) 461 if (err)
@@ -509,6 +500,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
509 } else { 500 } else {
510 qp->sq_no_prefetch = 0; 501 qp->sq_no_prefetch = 0;
511 502
503 if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
504 qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
505
512 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) 506 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
513 qp->flags |= MLX4_IB_QP_LSO; 507 qp->flags |= MLX4_IB_QP_LSO;
514 508
@@ -682,10 +676,15 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
682 struct mlx4_ib_qp *qp; 676 struct mlx4_ib_qp *qp;
683 int err; 677 int err;
684 678
685 /* We only support LSO, and only for kernel UD QPs. */ 679 /*
686 if (init_attr->create_flags & ~IB_QP_CREATE_IPOIB_UD_LSO) 680 * We only support LSO and multicast loopback blocking, and
681 * only for kernel UD QPs.
682 */
683 if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO |
684 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
687 return ERR_PTR(-EINVAL); 685 return ERR_PTR(-EINVAL);
688 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO && 686
687 if (init_attr->create_flags &&
689 (pd->uobject || init_attr->qp_type != IB_QPT_UD)) 688 (pd->uobject || init_attr->qp_type != IB_QPT_UD))
690 return ERR_PTR(-EINVAL); 689 return ERR_PTR(-EINVAL);
691 690
@@ -694,7 +693,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
694 case IB_QPT_UC: 693 case IB_QPT_UC:
695 case IB_QPT_UD: 694 case IB_QPT_UD:
696 { 695 {
697 qp = kmalloc(sizeof *qp, GFP_KERNEL); 696 qp = kzalloc(sizeof *qp, GFP_KERNEL);
698 if (!qp) 697 if (!qp)
699 return ERR_PTR(-ENOMEM); 698 return ERR_PTR(-ENOMEM);
700 699
@@ -715,7 +714,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
715 if (pd->uobject) 714 if (pd->uobject)
716 return ERR_PTR(-EINVAL); 715 return ERR_PTR(-EINVAL);
717 716
718 sqp = kmalloc(sizeof *sqp, GFP_KERNEL); 717 sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
719 if (!sqp) 718 if (!sqp)
720 return ERR_PTR(-ENOMEM); 719 return ERR_PTR(-ENOMEM);
721 720
@@ -906,7 +905,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
906 attr->path_mtu); 905 attr->path_mtu);
907 goto out; 906 goto out;
908 } 907 }
909 context->mtu_msgmax = (attr->path_mtu << 5) | 31; 908 context->mtu_msgmax = (attr->path_mtu << 5) |
909 ilog2(dev->dev->caps.max_msg_sz);
910 } 910 }
911 911
912 if (qp->rq.wqe_cnt) 912 if (qp->rq.wqe_cnt)
@@ -1063,6 +1063,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1063 for (i = 0; i < qp->sq.wqe_cnt; ++i) { 1063 for (i = 0; i < qp->sq.wqe_cnt; ++i) {
1064 ctrl = get_send_wqe(qp, i); 1064 ctrl = get_send_wqe(qp, i);
1065 ctrl->owner_opcode = cpu_to_be32(1 << 31); 1065 ctrl->owner_opcode = cpu_to_be32(1 << 31);
1066 if (qp->sq_max_wqes_per_wr == 1)
1067 ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4);
1066 1068
1067 stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); 1069 stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift);
1068 } 1070 }
@@ -1127,23 +1129,6 @@ out:
1127 return err; 1129 return err;
1128} 1130}
1129 1131
1130static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 };
1131static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = {
1132 [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
1133 IB_QP_PORT |
1134 IB_QP_QKEY),
1135 [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
1136 IB_QP_PORT |
1137 IB_QP_ACCESS_FLAGS),
1138 [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
1139 IB_QP_PORT |
1140 IB_QP_ACCESS_FLAGS),
1141 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
1142 IB_QP_QKEY),
1143 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
1144 IB_QP_QKEY),
1145};
1146
1147int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 1132int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1148 int attr_mask, struct ib_udata *udata) 1133 int attr_mask, struct ib_udata *udata)
1149{ 1134{
@@ -1186,15 +1171,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1186 goto out; 1171 goto out;
1187 } 1172 }
1188 1173
1189 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
1190 err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr,
1191 mlx4_ib_qp_attr_mask_table[ibqp->qp_type],
1192 IB_QPS_RESET, IB_QPS_INIT);
1193 if (err)
1194 goto out;
1195 cur_state = IB_QPS_INIT;
1196 }
1197
1198 err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); 1174 err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
1199 1175
1200out: 1176out:
@@ -1865,6 +1841,13 @@ done:
1865 1841
1866 qp_init_attr->cap = qp_attr->cap; 1842 qp_init_attr->cap = qp_attr->cap;
1867 1843
1844 qp_init_attr->create_flags = 0;
1845 if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)
1846 qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
1847
1848 if (qp->flags & MLX4_IB_QP_LSO)
1849 qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
1850
1868out: 1851out:
1869 mutex_unlock(&qp->mutex); 1852 mutex_unlock(&qp->mutex);
1870 return err; 1853 return err;
diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c
index a76306709618..c5ccc2daab60 100644
--- a/drivers/infiniband/hw/mthca/mthca_allocator.c
+++ b/drivers/infiniband/hw/mthca/mthca_allocator.c
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id: mthca_allocator.c 1349 2004-12-16 21:09:43Z roland $
33 */ 31 */
34 32
35#include <linux/errno.h> 33#include <linux/errno.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index 4b111a852ff6..32f6c6315454 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -29,8 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: mthca_av.c 1349 2004-12-16 21:09:43Z roland $
34 */ 32 */
35 33
36#include <linux/string.h> 34#include <linux/string.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index e948158a28d9..cc440f90000b 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id$
33 */ 31 */
34 32
35#include <linux/jiffies.h> 33#include <linux/jiffies.h>
@@ -128,7 +126,6 @@ static void handle_catas(struct mthca_dev *dev)
128static void poll_catas(unsigned long dev_ptr) 126static void poll_catas(unsigned long dev_ptr)
129{ 127{
130 struct mthca_dev *dev = (struct mthca_dev *) dev_ptr; 128 struct mthca_dev *dev = (struct mthca_dev *) dev_ptr;
131 unsigned long flags;
132 int i; 129 int i;
133 130
134 for (i = 0; i < dev->catas_err.size; ++i) 131 for (i = 0; i < dev->catas_err.size; ++i)
@@ -137,13 +134,8 @@ static void poll_catas(unsigned long dev_ptr)
137 return; 134 return;
138 } 135 }
139 136
140 spin_lock_irqsave(&catas_lock, flags); 137 mod_timer(&dev->catas_err.timer,
141 if (!dev->catas_err.stop) 138 round_jiffies(jiffies + MTHCA_CATAS_POLL_INTERVAL));
142 mod_timer(&dev->catas_err.timer,
143 jiffies + MTHCA_CATAS_POLL_INTERVAL);
144 spin_unlock_irqrestore(&catas_lock, flags);
145
146 return;
147} 139}
148 140
149void mthca_start_catas_poll(struct mthca_dev *dev) 141void mthca_start_catas_poll(struct mthca_dev *dev)
@@ -151,7 +143,6 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
151 unsigned long addr; 143 unsigned long addr;
152 144
153 init_timer(&dev->catas_err.timer); 145 init_timer(&dev->catas_err.timer);
154 dev->catas_err.stop = 0;
155 dev->catas_err.map = NULL; 146 dev->catas_err.map = NULL;
156 147
157 addr = pci_resource_start(dev->pdev, 0) + 148 addr = pci_resource_start(dev->pdev, 0) +
@@ -182,10 +173,6 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
182 173
183void mthca_stop_catas_poll(struct mthca_dev *dev) 174void mthca_stop_catas_poll(struct mthca_dev *dev)
184{ 175{
185 spin_lock_irq(&catas_lock);
186 dev->catas_err.stop = 1;
187 spin_unlock_irq(&catas_lock);
188
189 del_timer_sync(&dev->catas_err.timer); 176 del_timer_sync(&dev->catas_err.timer);
190 177
191 if (dev->catas_err.map) { 178 if (dev->catas_err.map) {
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 54d230ee7d63..c33e1c53c799 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $
35 */ 33 */
36 34
37#include <linux/completion.h> 35#include <linux/completion.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
index 8928ca4a9325..6efd3265f248 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: mthca_cmd.h 1349 2004-12-16 21:09:43Z roland $
35 */ 33 */
36 34
37#ifndef MTHCA_CMD_H 35#ifndef MTHCA_CMD_H
diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h
index afa56bfaab2e..75671f75cac4 100644
--- a/drivers/infiniband/hw/mthca/mthca_config_reg.h
+++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h
@@ -29,8 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: mthca_config_reg.h 1349 2004-12-16 21:09:43Z roland $
34 */ 32 */
35 33
36#ifndef MTHCA_CONFIG_REG_H 34#ifndef MTHCA_CONFIG_REG_H
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 20401d2ba6b2..d9f4735c2b37 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -32,8 +32,6 @@
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE. 34 * SOFTWARE.
35 *
36 * $Id: mthca_cq.c 1369 2004-12-20 16:17:07Z roland $
37 */ 35 */
38 36
39#include <linux/hardirq.h> 37#include <linux/hardirq.h>
@@ -622,13 +620,13 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
622 case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE: 620 case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE:
623 case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE: 621 case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE:
624 entry->wc_flags = IB_WC_WITH_IMM; 622 entry->wc_flags = IB_WC_WITH_IMM;
625 entry->imm_data = cqe->imm_etype_pkey_eec; 623 entry->ex.imm_data = cqe->imm_etype_pkey_eec;
626 entry->opcode = IB_WC_RECV; 624 entry->opcode = IB_WC_RECV;
627 break; 625 break;
628 case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: 626 case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
629 case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: 627 case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
630 entry->wc_flags = IB_WC_WITH_IMM; 628 entry->wc_flags = IB_WC_WITH_IMM;
631 entry->imm_data = cqe->imm_etype_pkey_eec; 629 entry->ex.imm_data = cqe->imm_etype_pkey_eec;
632 entry->opcode = IB_WC_RECV_RDMA_WITH_IMM; 630 entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
633 break; 631 break;
634 default: 632 default:
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 7bc32f8e377e..ee4d073c889f 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -32,8 +32,6 @@
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE. 34 * SOFTWARE.
35 *
36 * $Id: mthca_dev.h 1349 2004-12-16 21:09:43Z roland $
37 */ 35 */
38 36
39#ifndef MTHCA_DEV_H 37#ifndef MTHCA_DEV_H
@@ -279,7 +277,6 @@ struct mthca_mcg_table {
279struct mthca_catas_err { 277struct mthca_catas_err {
280 u64 addr; 278 u64 addr;
281 u32 __iomem *map; 279 u32 __iomem *map;
282 unsigned long stop;
283 u32 size; 280 u32 size;
284 struct timer_list timer; 281 struct timer_list timer;
285 struct list_head list; 282 struct list_head list;
diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h
index b374dc395be1..14f51ef97d7e 100644
--- a/drivers/infiniband/hw/mthca/mthca_doorbell.h
+++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $
35 */ 33 */
36 34
37#include <linux/types.h> 35#include <linux/types.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 8bde7f98e58a..4e36aa7cb3d2 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -29,8 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: mthca_eq.c 1382 2004-12-24 02:21:02Z roland $
34 */ 32 */
35 33
36#include <linux/errno.h> 34#include <linux/errno.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 8b7e83e6e88f..640449582aba 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: mthca_mad.c 1349 2004-12-16 21:09:43Z roland $
35 */ 33 */
36 34
37#include <linux/string.h> 35#include <linux/string.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 200cf13fc9bb..fb9f91b60f30 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: mthca_main.c 1396 2004-12-28 04:10:27Z roland $
35 */ 33 */
36 34
37#include <linux/module.h> 35#include <linux/module.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
index a8ad072be074..3f5f94879208 100644
--- a/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id: mthca_mcg.c 1349 2004-12-16 21:09:43Z roland $
33 */ 31 */
34 32
35#include <linux/string.h> 33#include <linux/string.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index d5862e5d99a0..1f7d1a29d2a8 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id$
35 */ 33 */
36 34
37#include <linux/mm.h> 35#include <linux/mm.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index a1ab06847b75..da9b8f9b884f 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id$
35 */ 33 */
36 34
37#ifndef MTHCA_MEMFREE_H 35#ifndef MTHCA_MEMFREE_H
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 820205dec560..8489b1e81c0f 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -29,8 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: mthca_mr.c 1349 2004-12-16 21:09:43Z roland $
34 */ 32 */
35 33
36#include <linux/slab.h> 34#include <linux/slab.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c
index c1e950764bd8..266f14e47406 100644
--- a/drivers/infiniband/hw/mthca/mthca_pd.c
+++ b/drivers/infiniband/hw/mthca/mthca_pd.c
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: mthca_pd.c 1349 2004-12-16 21:09:43Z roland $
35 */ 33 */
36 34
37#include <linux/errno.h> 35#include <linux/errno.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index 605a8d57fac6..d168c2540611 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -29,8 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: mthca_profile.c 1349 2004-12-16 21:09:43Z roland $
34 */ 32 */
35 33
36#include <linux/module.h> 34#include <linux/module.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h
index e76cb62d8e32..62b009cc8730 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.h
+++ b/drivers/infiniband/hw/mthca/mthca_profile.h
@@ -29,8 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: mthca_profile.h 1349 2004-12-16 21:09:43Z roland $
34 */ 32 */
35 33
36#ifndef MTHCA_PROFILE_H 34#ifndef MTHCA_PROFILE_H
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index be34f99ca625..87ad889e367b 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -32,8 +32,6 @@
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE. 34 * SOFTWARE.
35 *
36 * $Id: mthca_provider.c 4859 2006-01-09 21:55:10Z roland $
37 */ 35 */
38 36
39#include <rdma/ib_smi.h> 37#include <rdma/ib_smi.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 934bf9544037..c621f8794b88 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: mthca_provider.h 1349 2004-12-16 21:09:43Z roland $
35 */ 33 */
36 34
37#ifndef MTHCA_PROVIDER_H 35#ifndef MTHCA_PROVIDER_H
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 09dc3614cf2c..f5081bfde6db 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -31,8 +31,6 @@
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE. 33 * SOFTWARE.
34 *
35 * $Id: mthca_qp.c 1355 2004-12-17 15:23:43Z roland $
36 */ 34 */
37 35
38#include <linux/string.h> 36#include <linux/string.h>
@@ -850,23 +848,6 @@ out:
850 return err; 848 return err;
851} 849}
852 850
853static const struct ib_qp_attr dummy_init_attr = { .port_num = 1 };
854static const int dummy_init_attr_mask[] = {
855 [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
856 IB_QP_PORT |
857 IB_QP_QKEY),
858 [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
859 IB_QP_PORT |
860 IB_QP_ACCESS_FLAGS),
861 [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
862 IB_QP_PORT |
863 IB_QP_ACCESS_FLAGS),
864 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
865 IB_QP_QKEY),
866 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
867 IB_QP_QKEY),
868};
869
870int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, 851int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
871 struct ib_udata *udata) 852 struct ib_udata *udata)
872{ 853{
@@ -928,15 +909,6 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
928 goto out; 909 goto out;
929 } 910 }
930 911
931 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
932 err = __mthca_modify_qp(ibqp, &dummy_init_attr,
933 dummy_init_attr_mask[ibqp->qp_type],
934 IB_QPS_RESET, IB_QPS_INIT);
935 if (err)
936 goto out;
937 cur_state = IB_QPS_INIT;
938 }
939
940 err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); 912 err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
941 913
942out: 914out:
@@ -1277,10 +1249,10 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
1277 return -EINVAL; 1249 return -EINVAL;
1278 1250
1279 /* 1251 /*
1280 * For MLX transport we need 2 extra S/G entries: 1252 * For MLX transport we need 2 extra send gather entries:
1281 * one for the header and one for the checksum at the end 1253 * one for the header and one for the checksum at the end
1282 */ 1254 */
1283 if (qp->transport == MLX && cap->max_recv_sge + 2 > dev->limits.max_sg) 1255 if (qp->transport == MLX && cap->max_send_sge + 2 > dev->limits.max_sg)
1284 return -EINVAL; 1256 return -EINVAL;
1285 1257
1286 if (mthca_is_memfree(dev)) { 1258 if (mthca_is_memfree(dev)) {
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
index 91934f2d9dba..acb6817f6060 100644
--- a/drivers/infiniband/hw/mthca/mthca_reset.c
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id: mthca_reset.c 1349 2004-12-16 21:09:43Z roland $
33 */ 31 */
34 32
35#include <linux/init.h> 33#include <linux/init.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index a5ffff6e1026..4fabe62aab8a 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id: mthca_srq.c 3047 2005-08-10 03:59:35Z roland $
33 */ 31 */
34 32
35#include <linux/slab.h> 33#include <linux/slab.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c
index 8b728486410d..ca5900c96fcf 100644
--- a/drivers/infiniband/hw/mthca/mthca_uar.c
+++ b/drivers/infiniband/hw/mthca/mthca_uar.c
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id$
33 */ 31 */
34 32
35#include <asm/page.h> /* PAGE_SHIFT */ 33#include <asm/page.h> /* PAGE_SHIFT */
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
index e1262c942db8..5fe56e810739 100644
--- a/drivers/infiniband/hw/mthca/mthca_user.h
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -29,7 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 */ 32 */
34 33
35#ifndef MTHCA_USER_H 34#ifndef MTHCA_USER_H
diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h
index b3551a8dea1d..341a5ae881c1 100644
--- a/drivers/infiniband/hw/mthca/mthca_wqe.h
+++ b/drivers/infiniband/hw/mthca/mthca_wqe.h
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id: mthca_wqe.h 3047 2005-08-10 03:59:35Z roland $
33 */ 31 */
34 32
35#ifndef MTHCA_WQE_H 33#ifndef MTHCA_WQE_H
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index a4e9269a29bd..d2884e778098 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -328,7 +328,7 @@ void nes_rem_ref(struct ib_qp *ibqp)
328 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); 328 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
329 u64temp = (u64)nesqp->nesqp_context_pbase; 329 u64temp = (u64)nesqp->nesqp_context_pbase;
330 set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); 330 set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
331 nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); 331 nes_post_cqp_request(nesdev, cqp_request);
332 } 332 }
333} 333}
334 334
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 61b46e9c7d2d..39bd897b40c6 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -94,9 +94,6 @@
94 94
95#define MAX_DPC_ITERATIONS 128 95#define MAX_DPC_ITERATIONS 128
96 96
97#define NES_CQP_REQUEST_NO_DOORBELL_RING 0
98#define NES_CQP_REQUEST_RING_DOORBELL 1
99
100#define NES_DRV_OPT_ENABLE_MPA_VER_0 0x00000001 97#define NES_DRV_OPT_ENABLE_MPA_VER_0 0x00000001
101#define NES_DRV_OPT_DISABLE_MPA_CRC 0x00000002 98#define NES_DRV_OPT_DISABLE_MPA_CRC 0x00000002
102#define NES_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004 99#define NES_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004
@@ -538,7 +535,11 @@ void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *);
538void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16); 535void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16);
539void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16); 536void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16);
540struct nes_cqp_request *nes_get_cqp_request(struct nes_device *); 537struct nes_cqp_request *nes_get_cqp_request(struct nes_device *);
541void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *, int); 538void nes_free_cqp_request(struct nes_device *nesdev,
539 struct nes_cqp_request *cqp_request);
540void nes_put_cqp_request(struct nes_device *nesdev,
541 struct nes_cqp_request *cqp_request);
542void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *);
542int nes_arp_table(struct nes_device *, u32, u8 *, u32); 543int nes_arp_table(struct nes_device *, u32, u8 *, u32);
543void nes_mh_fix(unsigned long); 544void nes_mh_fix(unsigned long);
544void nes_clc(unsigned long); 545void nes_clc(unsigned long);
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 9a4b40fae40d..6aa531d5276d 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1603,7 +1603,6 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
1603 return NULL; 1603 return NULL;
1604 } 1604 }
1605 1605
1606 memset(listener, 0, sizeof(struct nes_cm_listener));
1607 listener->loc_addr = htonl(cm_info->loc_addr); 1606 listener->loc_addr = htonl(cm_info->loc_addr);
1608 listener->loc_port = htons(cm_info->loc_port); 1607 listener->loc_port = htons(cm_info->loc_port);
1609 listener->reused_node = 0; 1608 listener->reused_node = 0;
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index d3278f111ca7..85f26d19a32b 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -398,7 +398,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
398 nesadapter->base_pd = 1; 398 nesadapter->base_pd = 1;
399 399
400 nesadapter->device_cap_flags = 400 nesadapter->device_cap_flags =
401 IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW; 401 IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
402 402
403 nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter) 403 nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
404 [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]); 404 [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
@@ -2710,39 +2710,11 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
2710 barrier(); 2710 barrier();
2711 cqp_request->request_done = 1; 2711 cqp_request->request_done = 1;
2712 wake_up(&cqp_request->waitq); 2712 wake_up(&cqp_request->waitq);
2713 if (atomic_dec_and_test(&cqp_request->refcount)) { 2713 nes_put_cqp_request(nesdev, cqp_request);
2714 nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n",
2715 cqp_request,
2716 le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f);
2717 if (cqp_request->dynamic) {
2718 kfree(cqp_request);
2719 } else {
2720 spin_lock_irqsave(&nesdev->cqp.lock, flags);
2721 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
2722 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
2723 }
2724 }
2725 } else if (cqp_request->callback) {
2726 /* Envoke the callback routine */
2727 cqp_request->cqp_callback(nesdev, cqp_request);
2728 if (cqp_request->dynamic) {
2729 kfree(cqp_request);
2730 } else {
2731 spin_lock_irqsave(&nesdev->cqp.lock, flags);
2732 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
2733 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
2734 }
2735 } else { 2714 } else {
2736 nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n", 2715 if (cqp_request->callback)
2737 cqp_request, 2716 cqp_request->cqp_callback(nesdev, cqp_request);
2738 le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f); 2717 nes_free_cqp_request(nesdev, cqp_request);
2739 if (cqp_request->dynamic) {
2740 kfree(cqp_request);
2741 } else {
2742 spin_lock_irqsave(&nesdev->cqp.lock, flags);
2743 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
2744 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
2745 }
2746 } 2718 }
2747 } else { 2719 } else {
2748 wake_up(&nesdev->cqp.waitq); 2720 wake_up(&nesdev->cqp.waitq);
@@ -3149,7 +3121,6 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port,
3149{ 3121{
3150 struct nes_device *nesdev = nesvnic->nesdev; 3122 struct nes_device *nesdev = nesvnic->nesdev;
3151 struct nes_hw_cqp_wqe *cqp_wqe; 3123 struct nes_hw_cqp_wqe *cqp_wqe;
3152 unsigned long flags;
3153 struct nes_cqp_request *cqp_request; 3124 struct nes_cqp_request *cqp_request;
3154 int ret = 0; 3125 int ret = 0;
3155 u16 major_code; 3126 u16 major_code;
@@ -3176,7 +3147,7 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port,
3176 nes_debug(NES_DBG_QP, "Waiting for CQP completion for APBVT.\n"); 3147 nes_debug(NES_DBG_QP, "Waiting for CQP completion for APBVT.\n");
3177 3148
3178 atomic_set(&cqp_request->refcount, 2); 3149 atomic_set(&cqp_request->refcount, 2);
3179 nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); 3150 nes_post_cqp_request(nesdev, cqp_request);
3180 3151
3181 if (add_port == NES_MANAGE_APBVT_ADD) 3152 if (add_port == NES_MANAGE_APBVT_ADD)
3182 ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), 3153 ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
@@ -3184,15 +3155,9 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port,
3184 nes_debug(NES_DBG_QP, "Completed, ret=%u, CQP Major:Minor codes = 0x%04X:0x%04X\n", 3155 nes_debug(NES_DBG_QP, "Completed, ret=%u, CQP Major:Minor codes = 0x%04X:0x%04X\n",
3185 ret, cqp_request->major_code, cqp_request->minor_code); 3156 ret, cqp_request->major_code, cqp_request->minor_code);
3186 major_code = cqp_request->major_code; 3157 major_code = cqp_request->major_code;
3187 if (atomic_dec_and_test(&cqp_request->refcount)) { 3158
3188 if (cqp_request->dynamic) { 3159 nes_put_cqp_request(nesdev, cqp_request);
3189 kfree(cqp_request); 3160
3190 } else {
3191 spin_lock_irqsave(&nesdev->cqp.lock, flags);
3192 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
3193 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
3194 }
3195 }
3196 if (!ret) 3161 if (!ret)
3197 return -ETIME; 3162 return -ETIME;
3198 else if (major_code) 3163 else if (major_code)
@@ -3252,7 +3217,7 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr,
3252 nesdev->cqp.sq_head, nesdev->cqp.sq_tail); 3217 nesdev->cqp.sq_head, nesdev->cqp.sq_tail);
3253 3218
3254 atomic_set(&cqp_request->refcount, 1); 3219 atomic_set(&cqp_request->refcount, 1);
3255 nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); 3220 nes_post_cqp_request(nesdev, cqp_request);
3256} 3221}
3257 3222
3258 3223
@@ -3262,7 +3227,6 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr,
3262void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, 3227void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
3263 u32 which_wq, u32 wait_completion) 3228 u32 which_wq, u32 wait_completion)
3264{ 3229{
3265 unsigned long flags;
3266 struct nes_cqp_request *cqp_request; 3230 struct nes_cqp_request *cqp_request;
3267 struct nes_hw_cqp_wqe *cqp_wqe; 3231 struct nes_hw_cqp_wqe *cqp_wqe;
3268 int ret; 3232 int ret;
@@ -3285,7 +3249,7 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
3285 cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq); 3249 cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq);
3286 cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id); 3250 cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id);
3287 3251
3288 nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); 3252 nes_post_cqp_request(nesdev, cqp_request);
3289 3253
3290 if (wait_completion) { 3254 if (wait_completion) {
3291 /* Wait for CQP */ 3255 /* Wait for CQP */
@@ -3294,14 +3258,6 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
3294 nes_debug(NES_DBG_QP, "Flush SQ QP WQEs completed, ret=%u," 3258 nes_debug(NES_DBG_QP, "Flush SQ QP WQEs completed, ret=%u,"
3295 " CQP Major:Minor codes = 0x%04X:0x%04X\n", 3259 " CQP Major:Minor codes = 0x%04X:0x%04X\n",
3296 ret, cqp_request->major_code, cqp_request->minor_code); 3260 ret, cqp_request->major_code, cqp_request->minor_code);
3297 if (atomic_dec_and_test(&cqp_request->refcount)) { 3261 nes_put_cqp_request(nesdev, cqp_request);
3298 if (cqp_request->dynamic) {
3299 kfree(cqp_request);
3300 } else {
3301 spin_lock_irqsave(&nesdev->cqp.lock, flags);
3302 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
3303 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
3304 }
3305 }
3306 } 3262 }
3307} 3263}
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index 745bf94f3f07..7b81e0ae0076 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -1172,7 +1172,7 @@ struct nes_vnic {
1172 u32 mcrq_qp_id; 1172 u32 mcrq_qp_id;
1173 struct nes_ucontext *mcrq_ucontext; 1173 struct nes_ucontext *mcrq_ucontext;
1174 struct nes_cqp_request* (*get_cqp_request)(struct nes_device *nesdev); 1174 struct nes_cqp_request* (*get_cqp_request)(struct nes_device *nesdev);
1175 void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *, int); 1175 void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *);
1176 int (*mcrq_mcast_filter)( struct nes_vnic* nesvnic, __u8* dmi_addr ); 1176 int (*mcrq_mcast_filter)( struct nes_vnic* nesvnic, __u8* dmi_addr );
1177 struct net_device_stats netstats; 1177 struct net_device_stats netstats;
1178 /* used to put the netdev on the adapters logical port list */ 1178 /* used to put the netdev on the adapters logical port list */
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index fe83d1b2b177..fb8cbd71a2ef 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -567,12 +567,36 @@ struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev)
567 return cqp_request; 567 return cqp_request;
568} 568}
569 569
570void nes_free_cqp_request(struct nes_device *nesdev,
571 struct nes_cqp_request *cqp_request)
572{
573 unsigned long flags;
574
575 nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n",
576 cqp_request,
577 le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f);
578
579 if (cqp_request->dynamic) {
580 kfree(cqp_request);
581 } else {
582 spin_lock_irqsave(&nesdev->cqp.lock, flags);
583 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
584 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
585 }
586}
587
588void nes_put_cqp_request(struct nes_device *nesdev,
589 struct nes_cqp_request *cqp_request)
590{
591 if (atomic_dec_and_test(&cqp_request->refcount))
592 nes_free_cqp_request(nesdev, cqp_request);
593}
570 594
571/** 595/**
572 * nes_post_cqp_request 596 * nes_post_cqp_request
573 */ 597 */
574void nes_post_cqp_request(struct nes_device *nesdev, 598void nes_post_cqp_request(struct nes_device *nesdev,
575 struct nes_cqp_request *cqp_request, int ring_doorbell) 599 struct nes_cqp_request *cqp_request)
576{ 600{
577 struct nes_hw_cqp_wqe *cqp_wqe; 601 struct nes_hw_cqp_wqe *cqp_wqe;
578 unsigned long flags; 602 unsigned long flags;
@@ -600,10 +624,9 @@ void nes_post_cqp_request(struct nes_device *nesdev,
600 nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size, 624 nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size,
601 cqp_request->waiting, atomic_read(&cqp_request->refcount)); 625 cqp_request->waiting, atomic_read(&cqp_request->refcount));
602 barrier(); 626 barrier();
603 if (ring_doorbell) { 627
604 /* Ring doorbell (1 WQEs) */ 628 /* Ring doorbell (1 WQEs) */
605 nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id); 629 nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id);
606 }
607 630
608 barrier(); 631 barrier();
609 } else { 632 } else {
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index d617da9bd351..e3939d13484e 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -55,7 +55,6 @@ static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev);
55 * nes_alloc_mw 55 * nes_alloc_mw
56 */ 56 */
57static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { 57static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
58 unsigned long flags;
59 struct nes_pd *nespd = to_nespd(ibpd); 58 struct nes_pd *nespd = to_nespd(ibpd);
60 struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); 59 struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
61 struct nes_device *nesdev = nesvnic->nesdev; 60 struct nes_device *nesdev = nesvnic->nesdev;
@@ -119,7 +118,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
119 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag); 118 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
120 119
121 atomic_set(&cqp_request->refcount, 2); 120 atomic_set(&cqp_request->refcount, 2);
122 nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); 121 nes_post_cqp_request(nesdev, cqp_request);
123 122
124 /* Wait for CQP */ 123 /* Wait for CQP */
125 ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), 124 ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
@@ -128,15 +127,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
128 " CQP Major:Minor codes = 0x%04X:0x%04X.\n", 127 " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
129 stag, ret, cqp_request->major_code, cqp_request->minor_code); 128 stag, ret, cqp_request->major_code, cqp_request->minor_code);
130 if ((!ret) || (cqp_request->major_code)) { 129 if ((!ret) || (cqp_request->major_code)) {
131 if (atomic_dec_and_test(&cqp_request->refcount)) { 130 nes_put_cqp_request(nesdev, cqp_request);
132 if (cqp_request->dynamic) {
133 kfree(cqp_request);
134 } else {
135 spin_lock_irqsave(&nesdev->cqp.lock, flags);
136 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
137 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
138 }
139 }
140 kfree(nesmr); 131 kfree(nesmr);
141 nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); 132 nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
142 if (!ret) { 133 if (!ret) {
@@ -144,17 +135,8 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
144 } else { 135 } else {
145 return ERR_PTR(-ENOMEM); 136 return ERR_PTR(-ENOMEM);
146 } 137 }
147 } else {
148 if (atomic_dec_and_test(&cqp_request->refcount)) {
149 if (cqp_request->dynamic) {
150 kfree(cqp_request);
151 } else {
152 spin_lock_irqsave(&nesdev->cqp.lock, flags);
153 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
154 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
155 }
156 }
157 } 138 }
139 nes_put_cqp_request(nesdev, cqp_request);
158 140
159 nesmr->ibmw.rkey = stag; 141 nesmr->ibmw.rkey = stag;
160 nesmr->mode = IWNES_MEMREG_TYPE_MW; 142 nesmr->mode = IWNES_MEMREG_TYPE_MW;
@@ -178,7 +160,6 @@ static int nes_dealloc_mw(struct ib_mw *ibmw)
178 struct nes_hw_cqp_wqe *cqp_wqe; 160 struct nes_hw_cqp_wqe *cqp_wqe;
179 struct nes_cqp_request *cqp_request; 161 struct nes_cqp_request *cqp_request;
180 int err = 0; 162 int err = 0;
181 unsigned long flags;
182 int ret; 163 int ret;
183 164
184 /* Deallocate the window with the adapter */ 165 /* Deallocate the window with the adapter */
@@ -194,7 +175,7 @@ static int nes_dealloc_mw(struct ib_mw *ibmw)
194 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ibmw->rkey); 175 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ibmw->rkey);
195 176
196 atomic_set(&cqp_request->refcount, 2); 177 atomic_set(&cqp_request->refcount, 2);
197 nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); 178 nes_post_cqp_request(nesdev, cqp_request);
198 179
199 /* Wait for CQP */ 180 /* Wait for CQP */
200 nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X to complete.\n", 181 nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X to complete.\n",
@@ -204,32 +185,12 @@ static int nes_dealloc_mw(struct ib_mw *ibmw)
204 nes_debug(NES_DBG_MR, "Deallocate STag completed, wait_event_timeout ret = %u," 185 nes_debug(NES_DBG_MR, "Deallocate STag completed, wait_event_timeout ret = %u,"
205 " CQP Major:Minor codes = 0x%04X:0x%04X.\n", 186 " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
206 ret, cqp_request->major_code, cqp_request->minor_code); 187 ret, cqp_request->major_code, cqp_request->minor_code);
207 if ((!ret) || (cqp_request->major_code)) { 188 if (!ret)
208 if (atomic_dec_and_test(&cqp_request->refcount)) { 189 err = -ETIME;
209 if (cqp_request->dynamic) { 190 else if (cqp_request->major_code)
210 kfree(cqp_request); 191 err = -EIO;
211 } else { 192
212 spin_lock_irqsave(&nesdev->cqp.lock, flags); 193 nes_put_cqp_request(nesdev, cqp_request);
213 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
214 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
215 }
216 }
217 if (!ret) {
218 err = -ETIME;
219 } else {
220 err = -EIO;
221 }
222 } else {
223 if (atomic_dec_and_test(&cqp_request->refcount)) {
224 if (cqp_request->dynamic) {
225 kfree(cqp_request);
226 } else {
227 spin_lock_irqsave(&nesdev->cqp.lock, flags);
228 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
229 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
230 }
231 }
232 }
233 194
234 nes_free_resource(nesadapter, nesadapter->allocated_mrs, 195 nes_free_resource(nesadapter, nesadapter->allocated_mrs,
235 (ibmw->rkey & 0x0fffff00) >> 8); 196 (ibmw->rkey & 0x0fffff00) >> 8);
@@ -516,7 +477,7 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd,
516 (nesfmr->nesmr.pbls_used-1) : nesfmr->nesmr.pbls_used); 477 (nesfmr->nesmr.pbls_used-1) : nesfmr->nesmr.pbls_used);
517 478
518 atomic_set(&cqp_request->refcount, 2); 479 atomic_set(&cqp_request->refcount, 2);
519 nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); 480 nes_post_cqp_request(nesdev, cqp_request);
520 481
521 /* Wait for CQP */ 482 /* Wait for CQP */
522 ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), 483 ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
@@ -526,29 +487,11 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd,
526 stag, ret, cqp_request->major_code, cqp_request->minor_code); 487 stag, ret, cqp_request->major_code, cqp_request->minor_code);
527 488
528 if ((!ret) || (cqp_request->major_code)) { 489 if ((!ret) || (cqp_request->major_code)) {
529 if (atomic_dec_and_test(&cqp_request->refcount)) { 490 nes_put_cqp_request(nesdev, cqp_request);
530 if (cqp_request->dynamic) {
531 kfree(cqp_request);
532 } else {
533 spin_lock_irqsave(&nesdev->cqp.lock, flags);
534 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
535 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
536 }
537 }
538 ret = (!ret) ? -ETIME : -EIO; 491 ret = (!ret) ? -ETIME : -EIO;
539 goto failed_leaf_vpbl_pages_alloc; 492 goto failed_leaf_vpbl_pages_alloc;
540 } else {
541 if (atomic_dec_and_test(&cqp_request->refcount)) {
542 if (cqp_request->dynamic) {
543 kfree(cqp_request);
544 } else {
545 spin_lock_irqsave(&nesdev->cqp.lock, flags);
546 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
547 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
548 }
549 }
550 } 493 }
551 494 nes_put_cqp_request(nesdev, cqp_request);
552 nesfmr->nesmr.ibfmr.lkey = stag; 495 nesfmr->nesmr.ibfmr.lkey = stag;
553 nesfmr->nesmr.ibfmr.rkey = stag; 496 nesfmr->nesmr.ibfmr.rkey = stag;
554 nesfmr->attr = *ibfmr_attr; 497 nesfmr->attr = *ibfmr_attr;
@@ -1474,7 +1417,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
1474 set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); 1417 set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
1475 1418
1476 atomic_set(&cqp_request->refcount, 2); 1419 atomic_set(&cqp_request->refcount, 2);
1477 nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); 1420 nes_post_cqp_request(nesdev, cqp_request);
1478 1421
1479 /* Wait for CQP */ 1422 /* Wait for CQP */
1480 nes_debug(NES_DBG_QP, "Waiting for create iWARP QP%u to complete.\n", 1423 nes_debug(NES_DBG_QP, "Waiting for create iWARP QP%u to complete.\n",
@@ -1487,15 +1430,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
1487 nesqp->hwqp.qp_id, ret, nesdev->cqp.sq_head, nesdev->cqp.sq_tail, 1430 nesqp->hwqp.qp_id, ret, nesdev->cqp.sq_head, nesdev->cqp.sq_tail,
1488 cqp_request->major_code, cqp_request->minor_code); 1431 cqp_request->major_code, cqp_request->minor_code);
1489 if ((!ret) || (cqp_request->major_code)) { 1432 if ((!ret) || (cqp_request->major_code)) {
1490 if (atomic_dec_and_test(&cqp_request->refcount)) { 1433 nes_put_cqp_request(nesdev, cqp_request);
1491 if (cqp_request->dynamic) {
1492 kfree(cqp_request);
1493 } else {
1494 spin_lock_irqsave(&nesdev->cqp.lock, flags);
1495 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
1496 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
1497 }
1498 }
1499 nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); 1434 nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
1500 nes_free_qp_mem(nesdev, nesqp,virt_wqs); 1435 nes_free_qp_mem(nesdev, nesqp,virt_wqs);
1501 kfree(nesqp->allocated_buffer); 1436 kfree(nesqp->allocated_buffer);
@@ -1504,18 +1439,10 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
1504 } else { 1439 } else {
1505 return ERR_PTR(-EIO); 1440 return ERR_PTR(-EIO);
1506 } 1441 }
1507 } else {
1508 if (atomic_dec_and_test(&cqp_request->refcount)) {
1509 if (cqp_request->dynamic) {
1510 kfree(cqp_request);
1511 } else {
1512 spin_lock_irqsave(&nesdev->cqp.lock, flags);
1513 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
1514 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
1515 }
1516 }
1517 } 1442 }
1518 1443
1444 nes_put_cqp_request(nesdev, cqp_request);
1445
1519 if (ibpd->uobject) { 1446 if (ibpd->uobject) {
1520 uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index; 1447 uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index;
1521 uresp.actual_sq_size = sq_size; 1448 uresp.actual_sq_size = sq_size;
@@ -1817,7 +1744,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
1817 cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); 1744 cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
1818 1745
1819 atomic_set(&cqp_request->refcount, 2); 1746 atomic_set(&cqp_request->refcount, 2);
1820 nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); 1747 nes_post_cqp_request(nesdev, cqp_request);
1821 1748
1822 /* Wait for CQP */ 1749 /* Wait for CQP */
1823 nes_debug(NES_DBG_CQ, "Waiting for create iWARP CQ%u to complete.\n", 1750 nes_debug(NES_DBG_CQ, "Waiting for create iWARP CQ%u to complete.\n",
@@ -1827,32 +1754,15 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
1827 nes_debug(NES_DBG_CQ, "Create iWARP CQ%u completed, wait_event_timeout ret = %d.\n", 1754 nes_debug(NES_DBG_CQ, "Create iWARP CQ%u completed, wait_event_timeout ret = %d.\n",
1828 nescq->hw_cq.cq_number, ret); 1755 nescq->hw_cq.cq_number, ret);
1829 if ((!ret) || (cqp_request->major_code)) { 1756 if ((!ret) || (cqp_request->major_code)) {
1830 if (atomic_dec_and_test(&cqp_request->refcount)) { 1757 nes_put_cqp_request(nesdev, cqp_request);
1831 if (cqp_request->dynamic) {
1832 kfree(cqp_request);
1833 } else {
1834 spin_lock_irqsave(&nesdev->cqp.lock, flags);
1835 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
1836 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
1837 }
1838 }
1839 if (!context) 1758 if (!context)
1840 pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, 1759 pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
1841 nescq->hw_cq.cq_pbase); 1760 nescq->hw_cq.cq_pbase);
1842 nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); 1761 nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
1843 kfree(nescq); 1762 kfree(nescq);
1844 return ERR_PTR(-EIO); 1763 return ERR_PTR(-EIO);
1845 } else {
1846 if (atomic_dec_and_test(&cqp_request->refcount)) {
1847 if (cqp_request->dynamic) {
1848 kfree(cqp_request);
1849 } else {
1850 spin_lock_irqsave(&nesdev->cqp.lock, flags);
1851 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
1852 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
1853 }
1854 }
1855 } 1764 }
1765 nes_put_cqp_request(nesdev, cqp_request);
1856 1766
1857 if (context) { 1767 if (context) {
1858 /* free the nespbl */ 1768 /* free the nespbl */
@@ -1931,7 +1841,7 @@ static int nes_destroy_cq(struct ib_cq *ib_cq)
1931 (nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16))); 1841 (nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16)));
1932 nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number); 1842 nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number);
1933 atomic_set(&cqp_request->refcount, 2); 1843 atomic_set(&cqp_request->refcount, 2);
1934 nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); 1844 nes_post_cqp_request(nesdev, cqp_request);
1935 1845
1936 /* Wait for CQP */ 1846 /* Wait for CQP */
1937 nes_debug(NES_DBG_CQ, "Waiting for destroy iWARP CQ%u to complete.\n", 1847 nes_debug(NES_DBG_CQ, "Waiting for destroy iWARP CQ%u to complete.\n",
@@ -1942,37 +1852,18 @@ static int nes_destroy_cq(struct ib_cq *ib_cq)
1942 " CQP Major:Minor codes = 0x%04X:0x%04X.\n", 1852 " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
1943 nescq->hw_cq.cq_number, ret, cqp_request->major_code, 1853 nescq->hw_cq.cq_number, ret, cqp_request->major_code,
1944 cqp_request->minor_code); 1854 cqp_request->minor_code);
1945 if ((!ret) || (cqp_request->major_code)) { 1855 if (!ret) {
1946 if (atomic_dec_and_test(&cqp_request->refcount)) { 1856 nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n",
1947 if (cqp_request->dynamic) {
1948 kfree(cqp_request);
1949 } else {
1950 spin_lock_irqsave(&nesdev->cqp.lock, flags);
1951 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
1952 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
1953 }
1954 }
1955 if (!ret) {
1956 nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n",
1957 nescq->hw_cq.cq_number); 1857 nescq->hw_cq.cq_number);
1958 ret = -ETIME; 1858 ret = -ETIME;
1959 } else { 1859 } else if (cqp_request->major_code) {
1960 nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n", 1860 nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n",
1961 nescq->hw_cq.cq_number); 1861 nescq->hw_cq.cq_number);
1962 ret = -EIO; 1862 ret = -EIO;
1963 }
1964 } else { 1863 } else {
1965 ret = 0; 1864 ret = 0;
1966 if (atomic_dec_and_test(&cqp_request->refcount)) {
1967 if (cqp_request->dynamic) {
1968 kfree(cqp_request);
1969 } else {
1970 spin_lock_irqsave(&nesdev->cqp.lock, flags);
1971 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
1972 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
1973 }
1974 }
1975 } 1865 }
1866 nes_put_cqp_request(nesdev, cqp_request);
1976 1867
1977 if (nescq->cq_mem_size) 1868 if (nescq->cq_mem_size)
1978 pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, 1869 pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size,
@@ -2096,7 +1987,7 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
2096 barrier(); 1987 barrier();
2097 1988
2098 atomic_set(&cqp_request->refcount, 2); 1989 atomic_set(&cqp_request->refcount, 2);
2099 nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); 1990 nes_post_cqp_request(nesdev, cqp_request);
2100 1991
2101 /* Wait for CQP */ 1992 /* Wait for CQP */
2102 ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), 1993 ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
@@ -2105,15 +1996,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
2105 " CQP Major:Minor codes = 0x%04X:0x%04X.\n", 1996 " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
2106 stag, ret, cqp_request->major_code, cqp_request->minor_code); 1997 stag, ret, cqp_request->major_code, cqp_request->minor_code);
2107 major_code = cqp_request->major_code; 1998 major_code = cqp_request->major_code;
2108 if (atomic_dec_and_test(&cqp_request->refcount)) { 1999 nes_put_cqp_request(nesdev, cqp_request);
2109 if (cqp_request->dynamic) { 2000
2110 kfree(cqp_request);
2111 } else {
2112 spin_lock_irqsave(&nesdev->cqp.lock, flags);
2113 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
2114 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
2115 }
2116 }
2117 if (!ret) 2001 if (!ret)
2118 return -ETIME; 2002 return -ETIME;
2119 else if (major_code) 2003 else if (major_code)
@@ -2754,7 +2638,7 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
2754 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ib_mr->rkey); 2638 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ib_mr->rkey);
2755 2639
2756 atomic_set(&cqp_request->refcount, 2); 2640 atomic_set(&cqp_request->refcount, 2);
2757 nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); 2641 nes_post_cqp_request(nesdev, cqp_request);
2758 2642
2759 /* Wait for CQP */ 2643 /* Wait for CQP */
2760 nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X completed\n", ib_mr->rkey); 2644 nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X completed\n", ib_mr->rkey);
@@ -2771,15 +2655,9 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
2771 2655
2772 major_code = cqp_request->major_code; 2656 major_code = cqp_request->major_code;
2773 minor_code = cqp_request->minor_code; 2657 minor_code = cqp_request->minor_code;
2774 if (atomic_dec_and_test(&cqp_request->refcount)) { 2658
2775 if (cqp_request->dynamic) { 2659 nes_put_cqp_request(nesdev, cqp_request);
2776 kfree(cqp_request); 2660
2777 } else {
2778 spin_lock_irqsave(&nesdev->cqp.lock, flags);
2779 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
2780 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
2781 }
2782 }
2783 if (!ret) { 2661 if (!ret) {
2784 nes_debug(NES_DBG_MR, "Timeout waiting to destroy STag," 2662 nes_debug(NES_DBG_MR, "Timeout waiting to destroy STag,"
2785 " ib_mr=%p, rkey = 0x%08X\n", 2663 " ib_mr=%p, rkey = 0x%08X\n",
@@ -2904,7 +2782,6 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
2904 /* struct iw_cm_id *cm_id = nesqp->cm_id; */ 2782 /* struct iw_cm_id *cm_id = nesqp->cm_id; */
2905 /* struct iw_cm_event cm_event; */ 2783 /* struct iw_cm_event cm_event; */
2906 struct nes_cqp_request *cqp_request; 2784 struct nes_cqp_request *cqp_request;
2907 unsigned long flags;
2908 int ret; 2785 int ret;
2909 u16 major_code; 2786 u16 major_code;
2910 2787
@@ -2932,7 +2809,7 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
2932 set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase); 2809 set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase);
2933 2810
2934 atomic_set(&cqp_request->refcount, 2); 2811 atomic_set(&cqp_request->refcount, 2);
2935 nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); 2812 nes_post_cqp_request(nesdev, cqp_request);
2936 2813
2937 /* Wait for CQP */ 2814 /* Wait for CQP */
2938 if (wait_completion) { 2815 if (wait_completion) {
@@ -2950,15 +2827,9 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
2950 nesqp->hwqp.qp_id, cqp_request->major_code, 2827 nesqp->hwqp.qp_id, cqp_request->major_code,
2951 cqp_request->minor_code, next_iwarp_state); 2828 cqp_request->minor_code, next_iwarp_state);
2952 } 2829 }
2953 if (atomic_dec_and_test(&cqp_request->refcount)) { 2830
2954 if (cqp_request->dynamic) { 2831 nes_put_cqp_request(nesdev, cqp_request);
2955 kfree(cqp_request); 2832
2956 } else {
2957 spin_lock_irqsave(&nesdev->cqp.lock, flags);
2958 list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
2959 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
2960 }
2961 }
2962 if (!ret) 2833 if (!ret)
2963 return -ETIME; 2834 return -ETIME;
2964 else if (major_code) 2835 else if (major_code)
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig
index 1f76bad020f3..691525cf394a 100644
--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -1,6 +1,7 @@
1config INFINIBAND_IPOIB 1config INFINIBAND_IPOIB
2 tristate "IP-over-InfiniBand" 2 tristate "IP-over-InfiniBand"
3 depends on NETDEVICES && INET && (IPV6 || IPV6=n) 3 depends on NETDEVICES && INET && (IPV6 || IPV6=n)
4 select INET_LRO
4 ---help--- 5 ---help---
5 Support for the IP-over-InfiniBand protocol (IPoIB). This 6 Support for the IP-over-InfiniBand protocol (IPoIB). This
6 transports IP packets over InfiniBand so you can use your IB 7 transports IP packets over InfiniBand so you can use your IB
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index ca126fc2b853..b0ffc9abe8c0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: ipoib.h 1358 2004-12-17 22:00:11Z roland $
35 */ 33 */
36 34
37#ifndef _IPOIB_H 35#ifndef _IPOIB_H
@@ -52,9 +50,16 @@
52#include <rdma/ib_verbs.h> 50#include <rdma/ib_verbs.h>
53#include <rdma/ib_pack.h> 51#include <rdma/ib_pack.h>
54#include <rdma/ib_sa.h> 52#include <rdma/ib_sa.h>
53#include <linux/inet_lro.h>
55 54
56/* constants */ 55/* constants */
57 56
57enum ipoib_flush_level {
58 IPOIB_FLUSH_LIGHT,
59 IPOIB_FLUSH_NORMAL,
60 IPOIB_FLUSH_HEAVY
61};
62
58enum { 63enum {
59 IPOIB_ENCAP_LEN = 4, 64 IPOIB_ENCAP_LEN = 4,
60 65
@@ -65,8 +70,8 @@ enum {
65 IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, 70 IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN,
66 IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, 71 IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE,
67 IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE, 72 IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE,
68 IPOIB_RX_RING_SIZE = 128, 73 IPOIB_RX_RING_SIZE = 256,
69 IPOIB_TX_RING_SIZE = 64, 74 IPOIB_TX_RING_SIZE = 128,
70 IPOIB_MAX_QUEUE_SIZE = 8192, 75 IPOIB_MAX_QUEUE_SIZE = 8192,
71 IPOIB_MIN_QUEUE_SIZE = 2, 76 IPOIB_MIN_QUEUE_SIZE = 2,
72 IPOIB_CM_MAX_CONN_QP = 4096, 77 IPOIB_CM_MAX_CONN_QP = 4096,
@@ -84,7 +89,6 @@ enum {
84 IPOIB_FLAG_SUBINTERFACE = 5, 89 IPOIB_FLAG_SUBINTERFACE = 5,
85 IPOIB_MCAST_RUN = 6, 90 IPOIB_MCAST_RUN = 6,
86 IPOIB_STOP_REAPER = 7, 91 IPOIB_STOP_REAPER = 7,
87 IPOIB_MCAST_STARTED = 8,
88 IPOIB_FLAG_ADMIN_CM = 9, 92 IPOIB_FLAG_ADMIN_CM = 9,
89 IPOIB_FLAG_UMCAST = 10, 93 IPOIB_FLAG_UMCAST = 10,
90 IPOIB_FLAG_CSUM = 11, 94 IPOIB_FLAG_CSUM = 11,
@@ -96,7 +100,11 @@ enum {
96 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ 100 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
97 IPOIB_MCAST_FLAG_ATTACHED = 3, 101 IPOIB_MCAST_FLAG_ATTACHED = 3,
98 102
103 IPOIB_MAX_LRO_DESCRIPTORS = 8,
104 IPOIB_LRO_MAX_AGGR = 64,
105
99 MAX_SEND_CQE = 16, 106 MAX_SEND_CQE = 16,
107 IPOIB_CM_COPYBREAK = 256,
100}; 108};
101 109
102#define IPOIB_OP_RECV (1ul << 31) 110#define IPOIB_OP_RECV (1ul << 31)
@@ -149,6 +157,11 @@ struct ipoib_tx_buf {
149 u64 mapping[MAX_SKB_FRAGS + 1]; 157 u64 mapping[MAX_SKB_FRAGS + 1];
150}; 158};
151 159
160struct ipoib_cm_tx_buf {
161 struct sk_buff *skb;
162 u64 mapping;
163};
164
152struct ib_cm_id; 165struct ib_cm_id;
153 166
154struct ipoib_cm_data { 167struct ipoib_cm_data {
@@ -207,7 +220,7 @@ struct ipoib_cm_tx {
207 struct net_device *dev; 220 struct net_device *dev;
208 struct ipoib_neigh *neigh; 221 struct ipoib_neigh *neigh;
209 struct ipoib_path *path; 222 struct ipoib_path *path;
210 struct ipoib_tx_buf *tx_ring; 223 struct ipoib_cm_tx_buf *tx_ring;
211 unsigned tx_head; 224 unsigned tx_head;
212 unsigned tx_tail; 225 unsigned tx_tail;
213 unsigned long flags; 226 unsigned long flags;
@@ -249,6 +262,11 @@ struct ipoib_ethtool_st {
249 u16 max_coalesced_frames; 262 u16 max_coalesced_frames;
250}; 263};
251 264
265struct ipoib_lro {
266 struct net_lro_mgr lro_mgr;
267 struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS];
268};
269
252/* 270/*
253 * Device private locking: tx_lock protects members used in TX fast 271 * Device private locking: tx_lock protects members used in TX fast
254 * path (and we use LLTX so upper layers don't do extra locking). 272 * path (and we use LLTX so upper layers don't do extra locking).
@@ -264,7 +282,6 @@ struct ipoib_dev_priv {
264 282
265 unsigned long flags; 283 unsigned long flags;
266 284
267 struct mutex mcast_mutex;
268 struct mutex vlan_mutex; 285 struct mutex vlan_mutex;
269 286
270 struct rb_root path_tree; 287 struct rb_root path_tree;
@@ -276,10 +293,11 @@ struct ipoib_dev_priv {
276 293
277 struct delayed_work pkey_poll_task; 294 struct delayed_work pkey_poll_task;
278 struct delayed_work mcast_task; 295 struct delayed_work mcast_task;
279 struct work_struct flush_task; 296 struct work_struct flush_light;
297 struct work_struct flush_normal;
298 struct work_struct flush_heavy;
280 struct work_struct restart_task; 299 struct work_struct restart_task;
281 struct delayed_work ah_reap_task; 300 struct delayed_work ah_reap_task;
282 struct work_struct pkey_event_task;
283 301
284 struct ib_device *ca; 302 struct ib_device *ca;
285 u8 port; 303 u8 port;
@@ -335,6 +353,8 @@ struct ipoib_dev_priv {
335 int hca_caps; 353 int hca_caps;
336 struct ipoib_ethtool_st ethtool; 354 struct ipoib_ethtool_st ethtool;
337 struct timer_list poll_timer; 355 struct timer_list poll_timer;
356
357 struct ipoib_lro lro;
338}; 358};
339 359
340struct ipoib_ah { 360struct ipoib_ah {
@@ -359,6 +379,7 @@ struct ipoib_path {
359 379
360 struct rb_node rb_node; 380 struct rb_node rb_node;
361 struct list_head list; 381 struct list_head list;
382 int valid;
362}; 383};
363 384
364struct ipoib_neigh { 385struct ipoib_neigh {
@@ -423,11 +444,14 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
423 struct ipoib_ah *address, u32 qpn); 444 struct ipoib_ah *address, u32 qpn);
424void ipoib_reap_ah(struct work_struct *work); 445void ipoib_reap_ah(struct work_struct *work);
425 446
447void ipoib_mark_paths_invalid(struct net_device *dev);
426void ipoib_flush_paths(struct net_device *dev); 448void ipoib_flush_paths(struct net_device *dev);
427struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); 449struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
428 450
429int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); 451int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
430void ipoib_ib_dev_flush(struct work_struct *work); 452void ipoib_ib_dev_flush_light(struct work_struct *work);
453void ipoib_ib_dev_flush_normal(struct work_struct *work);
454void ipoib_ib_dev_flush_heavy(struct work_struct *work);
431void ipoib_pkey_event(struct work_struct *work); 455void ipoib_pkey_event(struct work_struct *work);
432void ipoib_ib_dev_cleanup(struct net_device *dev); 456void ipoib_ib_dev_cleanup(struct net_device *dev);
433 457
@@ -466,9 +490,7 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
466#endif 490#endif
467 491
468int ipoib_mcast_attach(struct net_device *dev, u16 mlid, 492int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
469 union ib_gid *mgid); 493 union ib_gid *mgid, int set_qkey);
470int ipoib_mcast_detach(struct net_device *dev, u16 mlid,
471 union ib_gid *mgid);
472 494
473int ipoib_init_qp(struct net_device *dev); 495int ipoib_init_qp(struct net_device *dev);
474int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); 496int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 97e67d36378f..0f2d3045061a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id$
33 */ 31 */
34 32
35#include <rdma/ib_cm.h> 33#include <rdma/ib_cm.h>
@@ -113,18 +111,20 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
113} 111}
114 112
115static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, 113static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
116 struct ipoib_cm_rx *rx, int id) 114 struct ipoib_cm_rx *rx,
115 struct ib_recv_wr *wr,
116 struct ib_sge *sge, int id)
117{ 117{
118 struct ipoib_dev_priv *priv = netdev_priv(dev); 118 struct ipoib_dev_priv *priv = netdev_priv(dev);
119 struct ib_recv_wr *bad_wr; 119 struct ib_recv_wr *bad_wr;
120 int i, ret; 120 int i, ret;
121 121
122 priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; 122 wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
123 123
124 for (i = 0; i < IPOIB_CM_RX_SG; ++i) 124 for (i = 0; i < IPOIB_CM_RX_SG; ++i)
125 priv->cm.rx_sge[i].addr = rx->rx_ring[id].mapping[i]; 125 sge[i].addr = rx->rx_ring[id].mapping[i];
126 126
127 ret = ib_post_recv(rx->qp, &priv->cm.rx_wr, &bad_wr); 127 ret = ib_post_recv(rx->qp, wr, &bad_wr);
128 if (unlikely(ret)) { 128 if (unlikely(ret)) {
129 ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); 129 ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
130 ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, 130 ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
@@ -322,10 +322,33 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
322 return 0; 322 return 0;
323} 323}
324 324
325static void ipoib_cm_init_rx_wr(struct net_device *dev,
326 struct ib_recv_wr *wr,
327 struct ib_sge *sge)
328{
329 struct ipoib_dev_priv *priv = netdev_priv(dev);
330 int i;
331
332 for (i = 0; i < priv->cm.num_frags; ++i)
333 sge[i].lkey = priv->mr->lkey;
334
335 sge[0].length = IPOIB_CM_HEAD_SIZE;
336 for (i = 1; i < priv->cm.num_frags; ++i)
337 sge[i].length = PAGE_SIZE;
338
339 wr->next = NULL;
340 wr->sg_list = priv->cm.rx_sge;
341 wr->num_sge = priv->cm.num_frags;
342}
343
325static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, 344static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
326 struct ipoib_cm_rx *rx) 345 struct ipoib_cm_rx *rx)
327{ 346{
328 struct ipoib_dev_priv *priv = netdev_priv(dev); 347 struct ipoib_dev_priv *priv = netdev_priv(dev);
348 struct {
349 struct ib_recv_wr wr;
350 struct ib_sge sge[IPOIB_CM_RX_SG];
351 } *t;
329 int ret; 352 int ret;
330 int i; 353 int i;
331 354
@@ -333,6 +356,14 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
333 if (!rx->rx_ring) 356 if (!rx->rx_ring)
334 return -ENOMEM; 357 return -ENOMEM;
335 358
359 t = kmalloc(sizeof *t, GFP_KERNEL);
360 if (!t) {
361 ret = -ENOMEM;
362 goto err_free;
363 }
364
365 ipoib_cm_init_rx_wr(dev, &t->wr, t->sge);
366
336 spin_lock_irq(&priv->lock); 367 spin_lock_irq(&priv->lock);
337 368
338 if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) { 369 if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) {
@@ -351,8 +382,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
351 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); 382 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
352 ret = -ENOMEM; 383 ret = -ENOMEM;
353 goto err_count; 384 goto err_count;
354 } 385 }
355 ret = ipoib_cm_post_receive_nonsrq(dev, rx, i); 386 ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i);
356 if (ret) { 387 if (ret) {
357 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq " 388 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq "
358 "failed for buf %d\n", i); 389 "failed for buf %d\n", i);
@@ -363,6 +394,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
363 394
364 rx->recv_count = ipoib_recvq_size; 395 rx->recv_count = ipoib_recvq_size;
365 396
397 kfree(t);
398
366 return 0; 399 return 0;
367 400
368err_count: 401err_count:
@@ -371,6 +404,7 @@ err_count:
371 spin_unlock_irq(&priv->lock); 404 spin_unlock_irq(&priv->lock);
372 405
373err_free: 406err_free:
407 kfree(t);
374 ipoib_cm_free_rx_ring(dev, rx->rx_ring); 408 ipoib_cm_free_rx_ring(dev, rx->rx_ring);
375 409
376 return ret; 410 return ret;
@@ -525,6 +559,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
525 u64 mapping[IPOIB_CM_RX_SG]; 559 u64 mapping[IPOIB_CM_RX_SG];
526 int frags; 560 int frags;
527 int has_srq; 561 int has_srq;
562 struct sk_buff *small_skb;
528 563
529 ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", 564 ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
530 wr_id, wc->status); 565 wr_id, wc->status);
@@ -579,6 +614,23 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
579 } 614 }
580 } 615 }
581 616
617 if (wc->byte_len < IPOIB_CM_COPYBREAK) {
618 int dlen = wc->byte_len;
619
620 small_skb = dev_alloc_skb(dlen + 12);
621 if (small_skb) {
622 skb_reserve(small_skb, 12);
623 ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0],
624 dlen, DMA_FROM_DEVICE);
625 skb_copy_from_linear_data(skb, small_skb->data, dlen);
626 ib_dma_sync_single_for_device(priv->ca, rx_ring[wr_id].mapping[0],
627 dlen, DMA_FROM_DEVICE);
628 skb_put(small_skb, dlen);
629 skb = small_skb;
630 goto copied;
631 }
632 }
633
582 frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, 634 frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
583 (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; 635 (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
584 636
@@ -601,6 +653,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
601 653
602 skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb); 654 skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
603 655
656copied:
604 skb->protocol = ((struct ipoib_header *) skb->data)->proto; 657 skb->protocol = ((struct ipoib_header *) skb->data)->proto;
605 skb_reset_mac_header(skb); 658 skb_reset_mac_header(skb);
606 skb_pull(skb, IPOIB_ENCAP_LEN); 659 skb_pull(skb, IPOIB_ENCAP_LEN);
@@ -620,7 +673,10 @@ repost:
620 ipoib_warn(priv, "ipoib_cm_post_receive_srq failed " 673 ipoib_warn(priv, "ipoib_cm_post_receive_srq failed "
621 "for buf %d\n", wr_id); 674 "for buf %d\n", wr_id);
622 } else { 675 } else {
623 if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, wr_id))) { 676 if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p,
677 &priv->cm.rx_wr,
678 priv->cm.rx_sge,
679 wr_id))) {
624 --p->recv_count; 680 --p->recv_count;
625 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed " 681 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed "
626 "for buf %d\n", wr_id); 682 "for buf %d\n", wr_id);
@@ -647,7 +703,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
647void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) 703void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
648{ 704{
649 struct ipoib_dev_priv *priv = netdev_priv(dev); 705 struct ipoib_dev_priv *priv = netdev_priv(dev);
650 struct ipoib_tx_buf *tx_req; 706 struct ipoib_cm_tx_buf *tx_req;
651 u64 addr; 707 u64 addr;
652 708
653 if (unlikely(skb->len > tx->mtu)) { 709 if (unlikely(skb->len > tx->mtu)) {
@@ -678,7 +734,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
678 return; 734 return;
679 } 735 }
680 736
681 tx_req->mapping[0] = addr; 737 tx_req->mapping = addr;
682 738
683 if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), 739 if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
684 addr, skb->len))) { 740 addr, skb->len))) {
@@ -703,7 +759,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
703 struct ipoib_dev_priv *priv = netdev_priv(dev); 759 struct ipoib_dev_priv *priv = netdev_priv(dev);
704 struct ipoib_cm_tx *tx = wc->qp->qp_context; 760 struct ipoib_cm_tx *tx = wc->qp->qp_context;
705 unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; 761 unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
706 struct ipoib_tx_buf *tx_req; 762 struct ipoib_cm_tx_buf *tx_req;
707 unsigned long flags; 763 unsigned long flags;
708 764
709 ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n", 765 ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
@@ -717,7 +773,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
717 773
718 tx_req = &tx->tx_ring[wr_id]; 774 tx_req = &tx->tx_ring[wr_id];
719 775
720 ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, DMA_TO_DEVICE); 776 ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE);
721 777
722 /* FIXME: is this right? Shouldn't we only increment on success? */ 778 /* FIXME: is this right? Shouldn't we only increment on success? */
723 ++dev->stats.tx_packets; 779 ++dev->stats.tx_packets;
@@ -1087,7 +1143,7 @@ err_tx:
1087static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) 1143static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
1088{ 1144{
1089 struct ipoib_dev_priv *priv = netdev_priv(p->dev); 1145 struct ipoib_dev_priv *priv = netdev_priv(p->dev);
1090 struct ipoib_tx_buf *tx_req; 1146 struct ipoib_cm_tx_buf *tx_req;
1091 unsigned long flags; 1147 unsigned long flags;
1092 unsigned long begin; 1148 unsigned long begin;
1093 1149
@@ -1115,7 +1171,7 @@ timeout:
1115 1171
1116 while ((int) p->tx_tail - (int) p->tx_head < 0) { 1172 while ((int) p->tx_tail - (int) p->tx_head < 0) {
1117 tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; 1173 tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
1118 ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, 1174 ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
1119 DMA_TO_DEVICE); 1175 DMA_TO_DEVICE);
1120 dev_kfree_skb_any(tx_req->skb); 1176 dev_kfree_skb_any(tx_req->skb);
1121 ++p->tx_tail; 1177 ++p->tx_tail;
@@ -1384,7 +1440,9 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
1384 ipoib_warn(priv, "enabling connected mode " 1440 ipoib_warn(priv, "enabling connected mode "
1385 "will cause multicast packet drops\n"); 1441 "will cause multicast packet drops\n");
1386 1442
1443 rtnl_lock();
1387 dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO); 1444 dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO);
1445 rtnl_unlock();
1388 priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; 1446 priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
1389 1447
1390 ipoib_flush_paths(dev); 1448 ipoib_flush_paths(dev);
@@ -1393,14 +1451,16 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
1393 1451
1394 if (!strcmp(buf, "datagram\n")) { 1452 if (!strcmp(buf, "datagram\n")) {
1395 clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); 1453 clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
1396 dev->mtu = min(priv->mcast_mtu, dev->mtu);
1397 ipoib_flush_paths(dev);
1398 1454
1455 rtnl_lock();
1399 if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) { 1456 if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) {
1400 dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; 1457 dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
1401 if (priv->hca_caps & IB_DEVICE_UD_TSO) 1458 if (priv->hca_caps & IB_DEVICE_UD_TSO)
1402 dev->features |= NETIF_F_TSO; 1459 dev->features |= NETIF_F_TSO;
1403 } 1460 }
1461 dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
1462 rtnl_unlock();
1463 ipoib_flush_paths(dev);
1404 1464
1405 return count; 1465 return count;
1406 } 1466 }
@@ -1485,15 +1545,7 @@ int ipoib_cm_dev_init(struct net_device *dev)
1485 priv->cm.num_frags = IPOIB_CM_RX_SG; 1545 priv->cm.num_frags = IPOIB_CM_RX_SG;
1486 } 1546 }
1487 1547
1488 for (i = 0; i < priv->cm.num_frags; ++i) 1548 ipoib_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge);
1489 priv->cm.rx_sge[i].lkey = priv->mr->lkey;
1490
1491 priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
1492 for (i = 1; i < priv->cm.num_frags; ++i)
1493 priv->cm.rx_sge[i].length = PAGE_SIZE;
1494 priv->cm.rx_wr.next = NULL;
1495 priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
1496 priv->cm.rx_wr.num_sge = priv->cm.num_frags;
1497 1549
1498 if (ipoib_cm_has_srq(dev)) { 1550 if (ipoib_cm_has_srq(dev)) {
1499 for (i = 0; i < ipoib_recvq_size; ++i) { 1551 for (i = 0; i < ipoib_recvq_size; ++i) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 10279b79c44d..66af5c1a76e5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -86,11 +86,57 @@ static int ipoib_set_coalesce(struct net_device *dev,
86 return 0; 86 return 0;
87} 87}
88 88
89static const char ipoib_stats_keys[][ETH_GSTRING_LEN] = {
90 "LRO aggregated", "LRO flushed",
91 "LRO avg aggr", "LRO no desc"
92};
93
94static void ipoib_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
95{
96 switch (stringset) {
97 case ETH_SS_STATS:
98 memcpy(data, *ipoib_stats_keys, sizeof(ipoib_stats_keys));
99 break;
100 }
101}
102
103static int ipoib_get_sset_count(struct net_device *dev, int sset)
104{
105 switch (sset) {
106 case ETH_SS_STATS:
107 return ARRAY_SIZE(ipoib_stats_keys);
108 default:
109 return -EOPNOTSUPP;
110 }
111}
112
113static void ipoib_get_ethtool_stats(struct net_device *dev,
114 struct ethtool_stats *stats, uint64_t *data)
115{
116 struct ipoib_dev_priv *priv = netdev_priv(dev);
117 int index = 0;
118
119 /* Get LRO statistics */
120 data[index++] = priv->lro.lro_mgr.stats.aggregated;
121 data[index++] = priv->lro.lro_mgr.stats.flushed;
122 if (priv->lro.lro_mgr.stats.flushed)
123 data[index++] = priv->lro.lro_mgr.stats.aggregated /
124 priv->lro.lro_mgr.stats.flushed;
125 else
126 data[index++] = 0;
127 data[index++] = priv->lro.lro_mgr.stats.no_desc;
128}
129
89static const struct ethtool_ops ipoib_ethtool_ops = { 130static const struct ethtool_ops ipoib_ethtool_ops = {
90 .get_drvinfo = ipoib_get_drvinfo, 131 .get_drvinfo = ipoib_get_drvinfo,
91 .get_tso = ethtool_op_get_tso, 132 .get_tso = ethtool_op_get_tso,
92 .get_coalesce = ipoib_get_coalesce, 133 .get_coalesce = ipoib_get_coalesce,
93 .set_coalesce = ipoib_set_coalesce, 134 .set_coalesce = ipoib_set_coalesce,
135 .get_flags = ethtool_op_get_flags,
136 .set_flags = ethtool_op_set_flags,
137 .get_strings = ipoib_get_strings,
138 .get_sset_count = ipoib_get_sset_count,
139 .get_ethtool_stats = ipoib_get_ethtool_stats,
94}; 140};
95 141
96void ipoib_set_ethtool_ops(struct net_device *dev) 142void ipoib_set_ethtool_ops(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 8b882bbd1d05..961c585da216 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id: ipoib_fs.c 1389 2004-12-27 22:56:47Z roland $
33 */ 31 */
34 32
35#include <linux/err.h> 33#include <linux/err.h>
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index f429bce24c20..66cafa20c246 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -31,8 +31,6 @@
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE. 33 * SOFTWARE.
34 *
35 * $Id: ipoib_ib.c 1386 2004-12-27 16:23:17Z roland $
36 */ 34 */
37 35
38#include <linux/delay.h> 36#include <linux/delay.h>
@@ -290,7 +288,10 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
290 if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok)) 288 if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok))
291 skb->ip_summed = CHECKSUM_UNNECESSARY; 289 skb->ip_summed = CHECKSUM_UNNECESSARY;
292 290
293 netif_receive_skb(skb); 291 if (dev->features & NETIF_F_LRO)
292 lro_receive_skb(&priv->lro.lro_mgr, skb, NULL);
293 else
294 netif_receive_skb(skb);
294 295
295repost: 296repost:
296 if (unlikely(ipoib_ib_post_receive(dev, wr_id))) 297 if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
@@ -442,6 +443,9 @@ poll_more:
442 } 443 }
443 444
444 if (done < budget) { 445 if (done < budget) {
446 if (dev->features & NETIF_F_LRO)
447 lro_flush_all(&priv->lro.lro_mgr);
448
445 netif_rx_complete(dev, napi); 449 netif_rx_complete(dev, napi);
446 if (unlikely(ib_req_notify_cq(priv->recv_cq, 450 if (unlikely(ib_req_notify_cq(priv->recv_cq,
447 IB_CQ_NEXT_COMP | 451 IB_CQ_NEXT_COMP |
@@ -898,7 +902,8 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
898 return 0; 902 return 0;
899} 903}
900 904
901static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) 905static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
906 enum ipoib_flush_level level)
902{ 907{
903 struct ipoib_dev_priv *cpriv; 908 struct ipoib_dev_priv *cpriv;
904 struct net_device *dev = priv->dev; 909 struct net_device *dev = priv->dev;
@@ -911,7 +916,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
911 * the parent is down. 916 * the parent is down.
912 */ 917 */
913 list_for_each_entry(cpriv, &priv->child_intfs, list) 918 list_for_each_entry(cpriv, &priv->child_intfs, list)
914 __ipoib_ib_dev_flush(cpriv, pkey_event); 919 __ipoib_ib_dev_flush(cpriv, level);
915 920
916 mutex_unlock(&priv->vlan_mutex); 921 mutex_unlock(&priv->vlan_mutex);
917 922
@@ -925,7 +930,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
925 return; 930 return;
926 } 931 }
927 932
928 if (pkey_event) { 933 if (level == IPOIB_FLUSH_HEAVY) {
929 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) { 934 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
930 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 935 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
931 ipoib_ib_dev_down(dev, 0); 936 ipoib_ib_dev_down(dev, 0);
@@ -943,11 +948,15 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
943 priv->pkey_index = new_index; 948 priv->pkey_index = new_index;
944 } 949 }
945 950
946 ipoib_dbg(priv, "flushing\n"); 951 if (level == IPOIB_FLUSH_LIGHT) {
952 ipoib_mark_paths_invalid(dev);
953 ipoib_mcast_dev_flush(dev);
954 }
947 955
948 ipoib_ib_dev_down(dev, 0); 956 if (level >= IPOIB_FLUSH_NORMAL)
957 ipoib_ib_dev_down(dev, 0);
949 958
950 if (pkey_event) { 959 if (level == IPOIB_FLUSH_HEAVY) {
951 ipoib_ib_dev_stop(dev, 0); 960 ipoib_ib_dev_stop(dev, 0);
952 ipoib_ib_dev_open(dev); 961 ipoib_ib_dev_open(dev);
953 } 962 }
@@ -957,27 +966,34 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
957 * we get here, don't bring it back up if it's not configured up 966 * we get here, don't bring it back up if it's not configured up
958 */ 967 */
959 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { 968 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
960 ipoib_ib_dev_up(dev); 969 if (level >= IPOIB_FLUSH_NORMAL)
970 ipoib_ib_dev_up(dev);
961 ipoib_mcast_restart_task(&priv->restart_task); 971 ipoib_mcast_restart_task(&priv->restart_task);
962 } 972 }
963} 973}
964 974
965void ipoib_ib_dev_flush(struct work_struct *work) 975void ipoib_ib_dev_flush_light(struct work_struct *work)
976{
977 struct ipoib_dev_priv *priv =
978 container_of(work, struct ipoib_dev_priv, flush_light);
979
980 __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT);
981}
982
983void ipoib_ib_dev_flush_normal(struct work_struct *work)
966{ 984{
967 struct ipoib_dev_priv *priv = 985 struct ipoib_dev_priv *priv =
968 container_of(work, struct ipoib_dev_priv, flush_task); 986 container_of(work, struct ipoib_dev_priv, flush_normal);
969 987
970 ipoib_dbg(priv, "Flushing %s\n", priv->dev->name); 988 __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL);
971 __ipoib_ib_dev_flush(priv, 0);
972} 989}
973 990
974void ipoib_pkey_event(struct work_struct *work) 991void ipoib_ib_dev_flush_heavy(struct work_struct *work)
975{ 992{
976 struct ipoib_dev_priv *priv = 993 struct ipoib_dev_priv *priv =
977 container_of(work, struct ipoib_dev_priv, pkey_event_task); 994 container_of(work, struct ipoib_dev_priv, flush_heavy);
978 995
979 ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name); 996 __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY);
980 __ipoib_ib_dev_flush(priv, 1);
981} 997}
982 998
983void ipoib_ib_dev_cleanup(struct net_device *dev) 999void ipoib_ib_dev_cleanup(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 2442090ac8d1..8be9ea0436e6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: ipoib_main.c 1377 2004-12-23 19:57:12Z roland $
35 */ 33 */
36 34
37#include "ipoib.h" 35#include "ipoib.h"
@@ -62,6 +60,15 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
62module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); 60module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
63MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); 61MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
64 62
63static int lro;
64module_param(lro, bool, 0444);
65MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)");
66
67static int lro_max_aggr = IPOIB_LRO_MAX_AGGR;
68module_param(lro_max_aggr, int, 0644);
69MODULE_PARM_DESC(lro_max_aggr, "LRO: Max packets to be aggregated "
70 "(default = 64)");
71
65#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 72#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
66int ipoib_debug_level; 73int ipoib_debug_level;
67 74
@@ -350,6 +357,23 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
350 357
351#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ 358#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
352 359
360void ipoib_mark_paths_invalid(struct net_device *dev)
361{
362 struct ipoib_dev_priv *priv = netdev_priv(dev);
363 struct ipoib_path *path, *tp;
364
365 spin_lock_irq(&priv->lock);
366
367 list_for_each_entry_safe(path, tp, &priv->path_list, list) {
368 ipoib_dbg(priv, "mark path LID 0x%04x GID " IPOIB_GID_FMT " invalid\n",
369 be16_to_cpu(path->pathrec.dlid),
370 IPOIB_GID_ARG(path->pathrec.dgid));
371 path->valid = 0;
372 }
373
374 spin_unlock_irq(&priv->lock);
375}
376
353void ipoib_flush_paths(struct net_device *dev) 377void ipoib_flush_paths(struct net_device *dev)
354{ 378{
355 struct ipoib_dev_priv *priv = netdev_priv(dev); 379 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -386,6 +410,7 @@ static void path_rec_completion(int status,
386 struct net_device *dev = path->dev; 410 struct net_device *dev = path->dev;
387 struct ipoib_dev_priv *priv = netdev_priv(dev); 411 struct ipoib_dev_priv *priv = netdev_priv(dev);
388 struct ipoib_ah *ah = NULL; 412 struct ipoib_ah *ah = NULL;
413 struct ipoib_ah *old_ah;
389 struct ipoib_neigh *neigh, *tn; 414 struct ipoib_neigh *neigh, *tn;
390 struct sk_buff_head skqueue; 415 struct sk_buff_head skqueue;
391 struct sk_buff *skb; 416 struct sk_buff *skb;
@@ -409,6 +434,7 @@ static void path_rec_completion(int status,
409 434
410 spin_lock_irqsave(&priv->lock, flags); 435 spin_lock_irqsave(&priv->lock, flags);
411 436
437 old_ah = path->ah;
412 path->ah = ah; 438 path->ah = ah;
413 439
414 if (ah) { 440 if (ah) {
@@ -421,6 +447,17 @@ static void path_rec_completion(int status,
421 __skb_queue_tail(&skqueue, skb); 447 __skb_queue_tail(&skqueue, skb);
422 448
423 list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) { 449 list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
450 if (neigh->ah) {
451 WARN_ON(neigh->ah != old_ah);
452 /*
453 * Dropping the ah reference inside
454 * priv->lock is safe here, because we
455 * will hold one more reference from
456 * the original value of path->ah (ie
457 * old_ah).
458 */
459 ipoib_put_ah(neigh->ah);
460 }
424 kref_get(&path->ah->ref); 461 kref_get(&path->ah->ref);
425 neigh->ah = path->ah; 462 neigh->ah = path->ah;
426 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, 463 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
@@ -443,6 +480,7 @@ static void path_rec_completion(int status,
443 while ((skb = __skb_dequeue(&neigh->queue))) 480 while ((skb = __skb_dequeue(&neigh->queue)))
444 __skb_queue_tail(&skqueue, skb); 481 __skb_queue_tail(&skqueue, skb);
445 } 482 }
483 path->valid = 1;
446 } 484 }
447 485
448 path->query = NULL; 486 path->query = NULL;
@@ -450,6 +488,9 @@ static void path_rec_completion(int status,
450 488
451 spin_unlock_irqrestore(&priv->lock, flags); 489 spin_unlock_irqrestore(&priv->lock, flags);
452 490
491 if (old_ah)
492 ipoib_put_ah(old_ah);
493
453 while ((skb = __skb_dequeue(&skqueue))) { 494 while ((skb = __skb_dequeue(&skqueue))) {
454 skb->dev = dev; 495 skb->dev = dev;
455 if (dev_queue_xmit(skb)) 496 if (dev_queue_xmit(skb))
@@ -623,8 +664,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
623 spin_lock(&priv->lock); 664 spin_lock(&priv->lock);
624 665
625 path = __path_find(dev, phdr->hwaddr + 4); 666 path = __path_find(dev, phdr->hwaddr + 4);
626 if (!path) { 667 if (!path || !path->valid) {
627 path = path_rec_create(dev, phdr->hwaddr + 4); 668 if (!path)
669 path = path_rec_create(dev, phdr->hwaddr + 4);
628 if (path) { 670 if (path) {
629 /* put pseudoheader back on for next time */ 671 /* put pseudoheader back on for next time */
630 skb_push(skb, sizeof *phdr); 672 skb_push(skb, sizeof *phdr);
@@ -938,6 +980,54 @@ static const struct header_ops ipoib_header_ops = {
938 .create = ipoib_hard_header, 980 .create = ipoib_hard_header,
939}; 981};
940 982
983static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
984 void **tcph, u64 *hdr_flags, void *priv)
985{
986 unsigned int ip_len;
987 struct iphdr *iph;
988
989 if (unlikely(skb->protocol != htons(ETH_P_IP)))
990 return -1;
991
992 /*
993 * In the future we may add an else clause that verifies the
994 * checksum and allows devices which do not calculate checksum
995 * to use LRO.
996 */
997 if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY))
998 return -1;
999
1000 /* Check for non-TCP packet */
1001 skb_reset_network_header(skb);
1002 iph = ip_hdr(skb);
1003 if (iph->protocol != IPPROTO_TCP)
1004 return -1;
1005
1006 ip_len = ip_hdrlen(skb);
1007 skb_set_transport_header(skb, ip_len);
1008 *tcph = tcp_hdr(skb);
1009
1010 /* check if IP header and TCP header are complete */
1011 if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
1012 return -1;
1013
1014 *hdr_flags = LRO_IPV4 | LRO_TCP;
1015 *iphdr = iph;
1016
1017 return 0;
1018}
1019
1020static void ipoib_lro_setup(struct ipoib_dev_priv *priv)
1021{
1022 priv->lro.lro_mgr.max_aggr = lro_max_aggr;
1023 priv->lro.lro_mgr.max_desc = IPOIB_MAX_LRO_DESCRIPTORS;
1024 priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc;
1025 priv->lro.lro_mgr.get_skb_header = get_skb_hdr;
1026 priv->lro.lro_mgr.features = LRO_F_NAPI;
1027 priv->lro.lro_mgr.dev = priv->dev;
1028 priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
1029}
1030
941static void ipoib_setup(struct net_device *dev) 1031static void ipoib_setup(struct net_device *dev)
942{ 1032{
943 struct ipoib_dev_priv *priv = netdev_priv(dev); 1033 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -977,10 +1067,11 @@ static void ipoib_setup(struct net_device *dev)
977 1067
978 priv->dev = dev; 1068 priv->dev = dev;
979 1069
1070 ipoib_lro_setup(priv);
1071
980 spin_lock_init(&priv->lock); 1072 spin_lock_init(&priv->lock);
981 spin_lock_init(&priv->tx_lock); 1073 spin_lock_init(&priv->tx_lock);
982 1074
983 mutex_init(&priv->mcast_mutex);
984 mutex_init(&priv->vlan_mutex); 1075 mutex_init(&priv->vlan_mutex);
985 1076
986 INIT_LIST_HEAD(&priv->path_list); 1077 INIT_LIST_HEAD(&priv->path_list);
@@ -989,9 +1080,10 @@ static void ipoib_setup(struct net_device *dev)
989 INIT_LIST_HEAD(&priv->multicast_list); 1080 INIT_LIST_HEAD(&priv->multicast_list);
990 1081
991 INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); 1082 INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
992 INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
993 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); 1083 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
994 INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush); 1084 INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light);
1085 INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal);
1086 INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy);
995 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); 1087 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
996 INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); 1088 INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
997} 1089}
@@ -1154,6 +1246,9 @@ static struct net_device *ipoib_add_port(const char *format,
1154 priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; 1246 priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
1155 } 1247 }
1156 1248
1249 if (lro)
1250 priv->dev->features |= NETIF_F_LRO;
1251
1157 /* 1252 /*
1158 * Set the full membership bit, so that we join the right 1253 * Set the full membership bit, so that we join the right
1159 * broadcast group, etc. 1254 * broadcast group, etc.
@@ -1304,6 +1399,12 @@ static int __init ipoib_init_module(void)
1304 ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); 1399 ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
1305#endif 1400#endif
1306 1401
1402 /*
1403 * When copying small received packets, we only copy from the
1404 * linear data part of the SKB, so we rely on this condition.
1405 */
1406 BUILD_BUG_ON(IPOIB_CM_COPYBREAK > IPOIB_CM_HEAD_SIZE);
1407
1307 ret = ipoib_register_debugfs(); 1408 ret = ipoib_register_debugfs();
1308 if (ret) 1409 if (ret)
1309 return ret; 1410 return ret;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 3f663fb852c1..1fcc9a898d81 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $
35 */ 33 */
36 34
37#include <linux/skbuff.h> 35#include <linux/skbuff.h>
@@ -188,6 +186,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
188 struct ipoib_dev_priv *priv = netdev_priv(dev); 186 struct ipoib_dev_priv *priv = netdev_priv(dev);
189 struct ipoib_ah *ah; 187 struct ipoib_ah *ah;
190 int ret; 188 int ret;
189 int set_qkey = 0;
191 190
192 mcast->mcmember = *mcmember; 191 mcast->mcmember = *mcmember;
193 192
@@ -202,6 +201,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
202 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); 201 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
203 spin_unlock_irq(&priv->lock); 202 spin_unlock_irq(&priv->lock);
204 priv->tx_wr.wr.ud.remote_qkey = priv->qkey; 203 priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
204 set_qkey = 1;
205 } 205 }
206 206
207 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 207 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -214,7 +214,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
214 } 214 }
215 215
216 ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), 216 ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
217 &mcast->mcmember.mgid); 217 &mcast->mcmember.mgid, set_qkey);
218 if (ret < 0) { 218 if (ret < 0) {
219 ipoib_warn(priv, "couldn't attach QP to multicast group " 219 ipoib_warn(priv, "couldn't attach QP to multicast group "
220 IPOIB_GID_FMT "\n", 220 IPOIB_GID_FMT "\n",
@@ -575,8 +575,11 @@ void ipoib_mcast_join_task(struct work_struct *work)
575 575
576 priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); 576 priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
577 577
578 if (!ipoib_cm_admin_enabled(dev)) 578 if (!ipoib_cm_admin_enabled(dev)) {
579 dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); 579 rtnl_lock();
580 dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
581 rtnl_unlock();
582 }
580 583
581 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); 584 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
582 585
@@ -594,10 +597,6 @@ int ipoib_mcast_start_thread(struct net_device *dev)
594 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); 597 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
595 mutex_unlock(&mcast_mutex); 598 mutex_unlock(&mcast_mutex);
596 599
597 spin_lock_irq(&priv->lock);
598 set_bit(IPOIB_MCAST_STARTED, &priv->flags);
599 spin_unlock_irq(&priv->lock);
600
601 return 0; 600 return 0;
602} 601}
603 602
@@ -607,10 +606,6 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
607 606
608 ipoib_dbg_mcast(priv, "stopping multicast thread\n"); 607 ipoib_dbg_mcast(priv, "stopping multicast thread\n");
609 608
610 spin_lock_irq(&priv->lock);
611 clear_bit(IPOIB_MCAST_STARTED, &priv->flags);
612 spin_unlock_irq(&priv->lock);
613
614 mutex_lock(&mcast_mutex); 609 mutex_lock(&mcast_mutex);
615 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 610 clear_bit(IPOIB_MCAST_RUN, &priv->flags);
616 cancel_delayed_work(&priv->mcast_task); 611 cancel_delayed_work(&priv->mcast_task);
@@ -635,10 +630,10 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
635 IPOIB_GID_ARG(mcast->mcmember.mgid)); 630 IPOIB_GID_ARG(mcast->mcmember.mgid));
636 631
637 /* Remove ourselves from the multicast group */ 632 /* Remove ourselves from the multicast group */
638 ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), 633 ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
639 &mcast->mcmember.mgid); 634 be16_to_cpu(mcast->mcmember.mlid));
640 if (ret) 635 if (ret)
641 ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); 636 ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
642 } 637 }
643 638
644 return 0; 639 return 0;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 8766d29ce3b7..68325119f740 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -29,24 +29,17 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
34 */ 32 */
35 33
36#include "ipoib.h" 34#include "ipoib.h"
37 35
38int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) 36int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey)
39{ 37{
40 struct ipoib_dev_priv *priv = netdev_priv(dev); 38 struct ipoib_dev_priv *priv = netdev_priv(dev);
41 struct ib_qp_attr *qp_attr; 39 struct ib_qp_attr *qp_attr = NULL;
42 int ret; 40 int ret;
43 u16 pkey_index; 41 u16 pkey_index;
44 42
45 ret = -ENOMEM;
46 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
47 if (!qp_attr)
48 goto out;
49
50 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { 43 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
51 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 44 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
52 ret = -ENXIO; 45 ret = -ENXIO;
@@ -54,18 +47,23 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
54 } 47 }
55 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 48 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
56 49
57 /* set correct QKey for QP */ 50 if (set_qkey) {
58 qp_attr->qkey = priv->qkey; 51 ret = -ENOMEM;
59 ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY); 52 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
60 if (ret) { 53 if (!qp_attr)
61 ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret); 54 goto out;
62 goto out; 55
56 /* set correct QKey for QP */
57 qp_attr->qkey = priv->qkey;
58 ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
59 if (ret) {
60 ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
61 goto out;
62 }
63 } 63 }
64 64
65 /* attach QP to multicast group */ 65 /* attach QP to multicast group */
66 mutex_lock(&priv->mcast_mutex);
67 ret = ib_attach_mcast(priv->qp, mgid, mlid); 66 ret = ib_attach_mcast(priv->qp, mgid, mlid);
68 mutex_unlock(&priv->mcast_mutex);
69 if (ret) 67 if (ret)
70 ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret); 68 ipoib_warn(priv, "failed to attach to multicast group, ret = %d\n", ret);
71 69
@@ -74,20 +72,6 @@ out:
74 return ret; 72 return ret;
75} 73}
76 74
77int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
78{
79 struct ipoib_dev_priv *priv = netdev_priv(dev);
80 int ret;
81
82 mutex_lock(&priv->mcast_mutex);
83 ret = ib_detach_mcast(priv->qp, mgid, mlid);
84 mutex_unlock(&priv->mcast_mutex);
85 if (ret)
86 ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
87
88 return ret;
89}
90
91int ipoib_init_qp(struct net_device *dev) 75int ipoib_init_qp(struct net_device *dev)
92{ 76{
93 struct ipoib_dev_priv *priv = netdev_priv(dev); 77 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -201,7 +185,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
201 init_attr.recv_cq = priv->recv_cq; 185 init_attr.recv_cq = priv->recv_cq;
202 186
203 if (priv->hca_caps & IB_DEVICE_UD_TSO) 187 if (priv->hca_caps & IB_DEVICE_UD_TSO)
204 init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO; 188 init_attr.create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
189
190 if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
191 init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
205 192
206 if (dev->features & NETIF_F_SG) 193 if (dev->features & NETIF_F_SG)
207 init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; 194 init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
@@ -289,15 +276,17 @@ void ipoib_event(struct ib_event_handler *handler,
289 if (record->element.port_num != priv->port) 276 if (record->element.port_num != priv->port)
290 return; 277 return;
291 278
292 if (record->event == IB_EVENT_PORT_ERR || 279 ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event,
293 record->event == IB_EVENT_PORT_ACTIVE || 280 record->device->name, record->element.port_num);
294 record->event == IB_EVENT_LID_CHANGE || 281
295 record->event == IB_EVENT_SM_CHANGE || 282 if (record->event == IB_EVENT_SM_CHANGE ||
296 record->event == IB_EVENT_CLIENT_REREGISTER) { 283 record->event == IB_EVENT_CLIENT_REREGISTER) {
297 ipoib_dbg(priv, "Port state change event\n"); 284 queue_work(ipoib_workqueue, &priv->flush_light);
298 queue_work(ipoib_workqueue, &priv->flush_task); 285 } else if (record->event == IB_EVENT_PORT_ERR ||
286 record->event == IB_EVENT_PORT_ACTIVE ||
287 record->event == IB_EVENT_LID_CHANGE) {
288 queue_work(ipoib_workqueue, &priv->flush_normal);
299 } else if (record->event == IB_EVENT_PKEY_CHANGE) { 289 } else if (record->event == IB_EVENT_PKEY_CHANGE) {
300 ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port); 290 queue_work(ipoib_workqueue, &priv->flush_heavy);
301 queue_work(ipoib_workqueue, &priv->pkey_event_task);
302 } 291 }
303} 292}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 1cdb5cfb0ff1..b08eb56196d3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id: ipoib_vlan.c 1349 2004-12-16 21:09:43Z roland $
33 */ 31 */
34 32
35#include <linux/module.h> 33#include <linux/module.h>
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index aeb58cae9a3f..356fac6d105a 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -42,9 +42,6 @@
42 * Zhenyu Wang 42 * Zhenyu Wang
43 * Modified by: 43 * Modified by:
44 * Erez Zilber 44 * Erez Zilber
45 *
46 *
47 * $Id: iscsi_iser.c 6965 2006-05-07 11:36:20Z ogerlitz $
48 */ 45 */
49 46
50#include <linux/types.h> 47#include <linux/types.h>
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index a8c1b300e34d..0e10703cf59e 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -36,8 +36,6 @@
36 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 36 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
37 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 37 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38 * SOFTWARE. 38 * SOFTWARE.
39 *
40 * $Id: iscsi_iser.h 7051 2006-05-10 12:29:11Z ogerlitz $
41 */ 39 */
42#ifndef __ISCSI_ISER_H__ 40#ifndef __ISCSI_ISER_H__
43#define __ISCSI_ISER_H__ 41#define __ISCSI_ISER_H__
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 08dc81c46f41..31ad498bdc51 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id: iser_initiator.c 6964 2006-05-07 11:11:43Z ogerlitz $
33 */ 31 */
34#include <linux/kernel.h> 32#include <linux/kernel.h>
35#include <linux/slab.h> 33#include <linux/slab.h>
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index cac50c4dc159..81e49cb10ed3 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id: iser_memory.c 6964 2006-05-07 11:11:43Z ogerlitz $
33 */ 31 */
34#include <linux/module.h> 32#include <linux/module.h>
35#include <linux/kernel.h> 33#include <linux/kernel.h>
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index d19cfe605ebb..77cabee7cc08 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -29,8 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: iser_verbs.c 7051 2006-05-10 12:29:11Z ogerlitz $
34 */ 32 */
35#include <linux/kernel.h> 33#include <linux/kernel.h>
36#include <linux/module.h> 34#include <linux/module.h>
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 435145709dd6..ed7c5f72cb8b 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id: ib_srp.c 3932 2005-11-01 17:19:29Z roland $
33 */ 31 */
34 32
35#include <linux/module.h> 33#include <linux/module.h>
@@ -49,8 +47,6 @@
49#include <scsi/srp.h> 47#include <scsi/srp.h>
50#include <scsi/scsi_transport_srp.h> 48#include <scsi/scsi_transport_srp.h>
51 49
52#include <rdma/ib_cache.h>
53
54#include "ib_srp.h" 50#include "ib_srp.h"
55 51
56#define DRV_NAME "ib_srp" 52#define DRV_NAME "ib_srp"
@@ -183,10 +179,10 @@ static int srp_init_qp(struct srp_target_port *target,
183 if (!attr) 179 if (!attr)
184 return -ENOMEM; 180 return -ENOMEM;
185 181
186 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev, 182 ret = ib_find_pkey(target->srp_host->srp_dev->dev,
187 target->srp_host->port, 183 target->srp_host->port,
188 be16_to_cpu(target->path.pkey), 184 be16_to_cpu(target->path.pkey),
189 &attr->pkey_index); 185 &attr->pkey_index);
190 if (ret) 186 if (ret)
191 goto out; 187 goto out;
192 188
@@ -1883,8 +1879,7 @@ static ssize_t srp_create_target(struct device *dev,
1883 if (ret) 1879 if (ret)
1884 goto err; 1880 goto err;
1885 1881
1886 ib_get_cached_gid(host->srp_dev->dev, host->port, 0, 1882 ib_query_gid(host->srp_dev->dev, host->port, 0, &target->path.sgid);
1887 &target->path.sgid);
1888 1883
1889 shost_printk(KERN_DEBUG, target->scsi_host, PFX 1884 shost_printk(KERN_DEBUG, target->scsi_host, PFX
1890 "new target: id_ext %016llx ioc_guid %016llx pkey %04x " 1885 "new target: id_ext %016llx ioc_guid %016llx pkey %04x "
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 63d2ae724061..e185b907fc12 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id: ib_srp.h 3932 2005-11-01 17:19:29Z roland $
33 */ 31 */
34 32
35#ifndef IB_SRP_H 33#ifndef IB_SRP_H
diff --git a/drivers/net/cxgb3/cxgb3_ctl_defs.h b/drivers/net/cxgb3/cxgb3_ctl_defs.h
index 6c4f32066919..ed0ecd9679cb 100644
--- a/drivers/net/cxgb3/cxgb3_ctl_defs.h
+++ b/drivers/net/cxgb3/cxgb3_ctl_defs.h
@@ -54,6 +54,7 @@ enum {
54 RDMA_CQ_DISABLE = 16, 54 RDMA_CQ_DISABLE = 16,
55 RDMA_CTRL_QP_SETUP = 17, 55 RDMA_CTRL_QP_SETUP = 17,
56 RDMA_GET_MEM = 18, 56 RDMA_GET_MEM = 18,
57 RDMA_GET_MIB = 19,
57 58
58 GET_RX_PAGE_INFO = 50, 59 GET_RX_PAGE_INFO = 50,
59}; 60};
diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c
index ff9c013ce535..cf2696873796 100644
--- a/drivers/net/cxgb3/cxgb3_offload.c
+++ b/drivers/net/cxgb3/cxgb3_offload.c
@@ -303,6 +303,12 @@ static int cxgb_rdma_ctl(struct adapter *adapter, unsigned int req, void *data)
303 spin_unlock_irq(&adapter->sge.reg_lock); 303 spin_unlock_irq(&adapter->sge.reg_lock);
304 break; 304 break;
305 } 305 }
306 case RDMA_GET_MIB: {
307 spin_lock(&adapter->stats_lock);
308 t3_tp_get_mib_stats(adapter, (struct tp_mib_stats *)data);
309 spin_unlock(&adapter->stats_lock);
310 break;
311 }
306 default: 312 default:
307 ret = -EOPNOTSUPP; 313 ret = -EOPNOTSUPP;
308 } 314 }
@@ -381,6 +387,7 @@ static int cxgb_offload_ctl(struct t3cdev *tdev, unsigned int req, void *data)
381 case RDMA_CQ_DISABLE: 387 case RDMA_CQ_DISABLE:
382 case RDMA_CTRL_QP_SETUP: 388 case RDMA_CTRL_QP_SETUP:
383 case RDMA_GET_MEM: 389 case RDMA_GET_MEM:
390 case RDMA_GET_MIB:
384 if (!offload_running(adapter)) 391 if (!offload_running(adapter))
385 return -EAGAIN; 392 return -EAGAIN;
386 return cxgb_rdma_ctl(adapter, req, data); 393 return cxgb_rdma_ctl(adapter, req, data);
diff --git a/drivers/net/cxgb3/version.h b/drivers/net/cxgb3/version.h
index a0177fc55e28..29db711303b9 100644
--- a/drivers/net/cxgb3/version.h
+++ b/drivers/net/cxgb3/version.h
@@ -38,7 +38,7 @@
38#define DRV_VERSION "1.0-ko" 38#define DRV_VERSION "1.0-ko"
39 39
40/* Firmware version */ 40/* Firmware version */
41#define FW_VERSION_MAJOR 6 41#define FW_VERSION_MAJOR 7
42#define FW_VERSION_MINOR 0 42#define FW_VERSION_MINOR 0
43#define FW_VERSION_MICRO 0 43#define FW_VERSION_MICRO 0
44#endif /* __CHELSIO_VERSION_H */ 44#endif /* __CHELSIO_VERSION_H */
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index d82f2751d2c7..2b5006b9be67 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -101,6 +101,34 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
101 mlx4_dbg(dev, " %s\n", fname[i]); 101 mlx4_dbg(dev, " %s\n", fname[i]);
102} 102}
103 103
104int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg)
105{
106 struct mlx4_cmd_mailbox *mailbox;
107 u32 *inbox;
108 int err = 0;
109
110#define MOD_STAT_CFG_IN_SIZE 0x100
111
112#define MOD_STAT_CFG_PG_SZ_M_OFFSET 0x002
113#define MOD_STAT_CFG_PG_SZ_OFFSET 0x003
114
115 mailbox = mlx4_alloc_cmd_mailbox(dev);
116 if (IS_ERR(mailbox))
117 return PTR_ERR(mailbox);
118 inbox = mailbox->buf;
119
120 memset(inbox, 0, MOD_STAT_CFG_IN_SIZE);
121
122 MLX4_PUT(inbox, cfg->log_pg_sz, MOD_STAT_CFG_PG_SZ_OFFSET);
123 MLX4_PUT(inbox, cfg->log_pg_sz_m, MOD_STAT_CFG_PG_SZ_M_OFFSET);
124
125 err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_MOD_STAT_CFG,
126 MLX4_CMD_TIME_CLASS_A);
127
128 mlx4_free_cmd_mailbox(dev, mailbox);
129 return err;
130}
131
104int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) 132int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
105{ 133{
106 struct mlx4_cmd_mailbox *mailbox; 134 struct mlx4_cmd_mailbox *mailbox;
diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
index 306cb9b0242d..a0e046c149b7 100644
--- a/drivers/net/mlx4/fw.h
+++ b/drivers/net/mlx4/fw.h
@@ -38,6 +38,11 @@
38#include "mlx4.h" 38#include "mlx4.h"
39#include "icm.h" 39#include "icm.h"
40 40
41struct mlx4_mod_stat_cfg {
42 u8 log_pg_sz;
43 u8 log_pg_sz_m;
44};
45
41struct mlx4_dev_cap { 46struct mlx4_dev_cap {
42 int max_srq_sz; 47 int max_srq_sz;
43 int max_qp_sz; 48 int max_qp_sz;
@@ -162,5 +167,6 @@ int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages);
162int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm); 167int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm);
163int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev); 168int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev);
164int mlx4_NOP(struct mlx4_dev *dev); 169int mlx4_NOP(struct mlx4_dev *dev);
170int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg);
165 171
166#endif /* MLX4_FW_H */ 172#endif /* MLX4_FW_H */
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index a6aa49fc1d68..d3736013fe9b 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -485,6 +485,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
485 struct mlx4_priv *priv = mlx4_priv(dev); 485 struct mlx4_priv *priv = mlx4_priv(dev);
486 struct mlx4_adapter adapter; 486 struct mlx4_adapter adapter;
487 struct mlx4_dev_cap dev_cap; 487 struct mlx4_dev_cap dev_cap;
488 struct mlx4_mod_stat_cfg mlx4_cfg;
488 struct mlx4_profile profile; 489 struct mlx4_profile profile;
489 struct mlx4_init_hca_param init_hca; 490 struct mlx4_init_hca_param init_hca;
490 u64 icm_size; 491 u64 icm_size;
@@ -502,6 +503,12 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
502 return err; 503 return err;
503 } 504 }
504 505
506 mlx4_cfg.log_pg_sz_m = 1;
507 mlx4_cfg.log_pg_sz = 0;
508 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
509 if (err)
510 mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
511
505 err = mlx4_dev_cap(dev, &dev_cap); 512 err = mlx4_dev_cap(dev, &dev_cap);
506 if (err) { 513 if (err) {
507 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 514 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c
index 57f7f1f0d4ec..b4b57870ddfd 100644
--- a/drivers/net/mlx4/mcg.c
+++ b/drivers/net/mlx4/mcg.c
@@ -38,6 +38,9 @@
38 38
39#include "mlx4.h" 39#include "mlx4.h"
40 40
41#define MGM_QPN_MASK 0x00FFFFFF
42#define MGM_BLCK_LB_BIT 30
43
41struct mlx4_mgm { 44struct mlx4_mgm {
42 __be32 next_gid_index; 45 __be32 next_gid_index;
43 __be32 members_count; 46 __be32 members_count;
@@ -153,7 +156,8 @@ static int find_mgm(struct mlx4_dev *dev,
153 return err; 156 return err;
154} 157}
155 158
156int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]) 159int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
160 int block_mcast_loopback)
157{ 161{
158 struct mlx4_priv *priv = mlx4_priv(dev); 162 struct mlx4_priv *priv = mlx4_priv(dev);
159 struct mlx4_cmd_mailbox *mailbox; 163 struct mlx4_cmd_mailbox *mailbox;
@@ -202,13 +206,18 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16])
202 } 206 }
203 207
204 for (i = 0; i < members_count; ++i) 208 for (i = 0; i < members_count; ++i)
205 if (mgm->qp[i] == cpu_to_be32(qp->qpn)) { 209 if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) {
206 mlx4_dbg(dev, "QP %06x already a member of MGM\n", qp->qpn); 210 mlx4_dbg(dev, "QP %06x already a member of MGM\n", qp->qpn);
207 err = 0; 211 err = 0;
208 goto out; 212 goto out;
209 } 213 }
210 214
211 mgm->qp[members_count++] = cpu_to_be32(qp->qpn); 215 if (block_mcast_loopback)
216 mgm->qp[members_count++] = cpu_to_be32((qp->qpn & MGM_QPN_MASK) |
217 (1 << MGM_BLCK_LB_BIT));
218 else
219 mgm->qp[members_count++] = cpu_to_be32(qp->qpn & MGM_QPN_MASK);
220
212 mgm->members_count = cpu_to_be32(members_count); 221 mgm->members_count = cpu_to_be32(members_count);
213 222
214 err = mlx4_WRITE_MCG(dev, index, mailbox); 223 err = mlx4_WRITE_MCG(dev, index, mailbox);
@@ -283,7 +292,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16])
283 292
284 members_count = be32_to_cpu(mgm->members_count); 293 members_count = be32_to_cpu(mgm->members_count);
285 for (loc = -1, i = 0; i < members_count; ++i) 294 for (loc = -1, i = 0; i < members_count; ++i)
286 if (mgm->qp[i] == cpu_to_be32(qp->qpn)) 295 if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn)
287 loc = i; 296 loc = i;
288 297
289 if (loc == -1) { 298 if (loc == -1) {
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 44d8d5163a1a..683bce375c74 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -60,6 +60,13 @@
60 60
61#define IDESCSI_DEBUG_LOG 0 61#define IDESCSI_DEBUG_LOG 0
62 62
63#if IDESCSI_DEBUG_LOG
64#define debug_log(fmt, args...) \
65 printk(KERN_INFO "ide-scsi: " fmt, ## args)
66#else
67#define debug_log(fmt, args...) do {} while (0)
68#endif
69
63/* 70/*
64 * SCSI command transformation layer 71 * SCSI command transformation layer
65 */ 72 */
@@ -129,14 +136,15 @@ static inline idescsi_scsi_t *drive_to_idescsi(ide_drive_t *ide_drive)
129#define IDESCSI_PC_RQ 90 136#define IDESCSI_PC_RQ 90
130 137
131/* 138/*
132 * PIO data transfer routines using the scatter gather table. 139 * PIO data transfer routine using the scatter gather table.
133 */ 140 */
134static void idescsi_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, 141static void ide_scsi_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
135 unsigned int bcount) 142 unsigned int bcount, int write)
136{ 143{
137 ide_hwif_t *hwif = drive->hwif; 144 ide_hwif_t *hwif = drive->hwif;
138 int count; 145 xfer_func_t *xf = write ? hwif->output_data : hwif->input_data;
139 char *buf; 146 char *buf;
147 int count;
140 148
141 while (bcount) { 149 while (bcount) {
142 count = min(pc->sg->length - pc->b_count, bcount); 150 count = min(pc->sg->length - pc->b_count, bcount);
@@ -145,13 +153,13 @@ static void idescsi_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
145 153
146 local_irq_save(flags); 154 local_irq_save(flags);
147 buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) + 155 buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) +
148 pc->sg->offset; 156 pc->sg->offset;
149 hwif->input_data(drive, NULL, buf + pc->b_count, count); 157 xf(drive, NULL, buf + pc->b_count, count);
150 kunmap_atomic(buf - pc->sg->offset, KM_IRQ0); 158 kunmap_atomic(buf - pc->sg->offset, KM_IRQ0);
151 local_irq_restore(flags); 159 local_irq_restore(flags);
152 } else { 160 } else {
153 buf = sg_virt(pc->sg); 161 buf = sg_virt(pc->sg);
154 hwif->input_data(drive, NULL, buf + pc->b_count, count); 162 xf(drive, NULL, buf + pc->b_count, count);
155 } 163 }
156 bcount -= count; pc->b_count += count; 164 bcount -= count; pc->b_count += count;
157 if (pc->b_count == pc->sg->length) { 165 if (pc->b_count == pc->sg->length) {
@@ -163,51 +171,34 @@ static void idescsi_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
163 } 171 }
164 172
165 if (bcount) { 173 if (bcount) {
166 printk (KERN_ERR "ide-scsi: scatter gather table too small, discarding data\n"); 174 printk(KERN_ERR "%s: scatter gather table too small, %s\n",
167 ide_pad_transfer(drive, 0, bcount); 175 drive->name, write ? "padding with zeros"
176 : "discarding data");
177 ide_pad_transfer(drive, write, bcount);
168 } 178 }
169} 179}
170 180
171static void idescsi_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, 181static void ide_scsi_hex_dump(u8 *data, int len)
172 unsigned int bcount)
173{ 182{
174 ide_hwif_t *hwif = drive->hwif; 183 print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, data, len, 0);
175 int count; 184}
176 char *buf;
177 185
178 while (bcount) { 186static int idescsi_end_request(ide_drive_t *, int, int);
179 count = min(pc->sg->length - pc->b_count, bcount);
180 if (PageHighMem(sg_page(pc->sg))) {
181 unsigned long flags;
182 187
183 local_irq_save(flags); 188static void ide_scsi_callback(ide_drive_t *drive)
184 buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) + 189{
185 pc->sg->offset; 190 idescsi_scsi_t *scsi = drive_to_idescsi(drive);
186 hwif->output_data(drive, NULL, buf + pc->b_count, count); 191 struct ide_atapi_pc *pc = scsi->pc;
187 kunmap_atomic(buf - pc->sg->offset, KM_IRQ0);
188 local_irq_restore(flags);
189 } else {
190 buf = sg_virt(pc->sg);
191 hwif->output_data(drive, NULL, buf + pc->b_count, count);
192 }
193 bcount -= count; pc->b_count += count;
194 if (pc->b_count == pc->sg->length) {
195 if (!--pc->sg_cnt)
196 break;
197 pc->sg = sg_next(pc->sg);
198 pc->b_count = 0;
199 }
200 }
201 192
202 if (bcount) { 193 if (pc->flags & PC_FLAG_TIMEDOUT)
203 printk (KERN_ERR "ide-scsi: scatter gather table too small, padding with zeros\n"); 194 debug_log("%s: got timed out packet %lu at %lu\n", __func__,
204 ide_pad_transfer(drive, 1, bcount); 195 pc->scsi_cmd->serial_number, jiffies);
205 } 196 /* end this request now - scsi should retry it*/
206} 197 else if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
198 printk(KERN_INFO "Packet command completed, %d bytes"
199 " transferred\n", pc->xferred);
207 200
208static void ide_scsi_hex_dump(u8 *data, int len) 201 idescsi_end_request(drive, 1, 0);
209{
210 print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, data, len, 0);
211} 202}
212 203
213static int idescsi_check_condition(ide_drive_t *drive, 204static int idescsi_check_condition(ide_drive_t *drive,
@@ -228,14 +219,16 @@ static int idescsi_check_condition(ide_drive_t *drive,
228 kfree(pc); 219 kfree(pc);
229 return -ENOMEM; 220 return -ENOMEM;
230 } 221 }
231 ide_init_drive_cmd(rq); 222 blk_rq_init(NULL, rq);
232 rq->special = (char *) pc; 223 rq->special = (char *) pc;
233 pc->rq = rq; 224 pc->rq = rq;
234 pc->buf = buf; 225 pc->buf = buf;
235 pc->c[0] = REQUEST_SENSE; 226 pc->c[0] = REQUEST_SENSE;
236 pc->c[4] = pc->req_xfer = pc->buf_size = SCSI_SENSE_BUFFERSIZE; 227 pc->c[4] = pc->req_xfer = pc->buf_size = SCSI_SENSE_BUFFERSIZE;
237 rq->cmd_type = REQ_TYPE_SENSE; 228 rq->cmd_type = REQ_TYPE_SENSE;
229 rq->cmd_flags |= REQ_PREEMPT;
238 pc->timeout = jiffies + WAIT_READY; 230 pc->timeout = jiffies + WAIT_READY;
231 pc->callback = ide_scsi_callback;
239 /* NOTE! Save the failed packet command in "rq->buffer" */ 232 /* NOTE! Save the failed packet command in "rq->buffer" */
240 rq->buffer = (void *) failed_cmd->special; 233 rq->buffer = (void *) failed_cmd->special;
241 pc->scsi_cmd = ((struct ide_atapi_pc *) failed_cmd->special)->scsi_cmd; 234 pc->scsi_cmd = ((struct ide_atapi_pc *) failed_cmd->special)->scsi_cmd;
@@ -244,11 +237,10 @@ static int idescsi_check_condition(ide_drive_t *drive,
244 ide_scsi_hex_dump(pc->c, 6); 237 ide_scsi_hex_dump(pc->c, 6);
245 } 238 }
246 rq->rq_disk = scsi->disk; 239 rq->rq_disk = scsi->disk;
247 return ide_do_drive_cmd(drive, rq, ide_preempt); 240 ide_do_drive_cmd(drive, rq);
241 return 0;
248} 242}
249 243
250static int idescsi_end_request(ide_drive_t *, int, int);
251
252static ide_startstop_t 244static ide_startstop_t
253idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err) 245idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err)
254{ 246{
@@ -256,7 +248,7 @@ idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err)
256 248
257 if (ide_read_status(drive) & (BUSY_STAT | DRQ_STAT)) 249 if (ide_read_status(drive) & (BUSY_STAT | DRQ_STAT))
258 /* force an abort */ 250 /* force an abort */
259 hwif->OUTBSYNC(drive, WIN_IDLEIMMEDIATE, 251 hwif->OUTBSYNC(hwif, WIN_IDLEIMMEDIATE,
260 hwif->io_ports.command_addr); 252 hwif->io_ports.command_addr);
261 253
262 rq->errors++; 254 rq->errors++;
@@ -269,10 +261,9 @@ idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err)
269static ide_startstop_t 261static ide_startstop_t
270idescsi_atapi_abort(ide_drive_t *drive, struct request *rq) 262idescsi_atapi_abort(ide_drive_t *drive, struct request *rq)
271{ 263{
272#if IDESCSI_DEBUG_LOG 264 debug_log("%s called for %lu\n", __func__,
273 printk(KERN_WARNING "idescsi_atapi_abort called for %lu\n",
274 ((struct ide_atapi_pc *) rq->special)->scsi_cmd->serial_number); 265 ((struct ide_atapi_pc *) rq->special)->scsi_cmd->serial_number);
275#endif 266
276 rq->errors |= ERROR_MAX; 267 rq->errors |= ERROR_MAX;
277 268
278 idescsi_end_request(drive, 0, 0); 269 idescsi_end_request(drive, 0, 0);
@@ -351,9 +342,9 @@ static int idescsi_expiry(ide_drive_t *drive)
351 idescsi_scsi_t *scsi = drive_to_idescsi(drive); 342 idescsi_scsi_t *scsi = drive_to_idescsi(drive);
352 struct ide_atapi_pc *pc = scsi->pc; 343 struct ide_atapi_pc *pc = scsi->pc;
353 344
354#if IDESCSI_DEBUG_LOG 345 debug_log("%s called for %lu at %lu\n", __func__,
355 printk(KERN_WARNING "idescsi_expiry called for %lu at %lu\n", pc->scsi_cmd->serial_number, jiffies); 346 pc->scsi_cmd->serial_number, jiffies);
356#endif 347
357 pc->flags |= PC_FLAG_TIMEDOUT; 348 pc->flags |= PC_FLAG_TIMEDOUT;
358 349
359 return 0; /* we do not want the ide subsystem to retry */ 350 return 0; /* we do not want the ide subsystem to retry */
@@ -365,141 +356,19 @@ static int idescsi_expiry(ide_drive_t *drive)
365static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive) 356static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive)
366{ 357{
367 idescsi_scsi_t *scsi = drive_to_idescsi(drive); 358 idescsi_scsi_t *scsi = drive_to_idescsi(drive);
368 ide_hwif_t *hwif = drive->hwif;
369 struct ide_atapi_pc *pc = scsi->pc; 359 struct ide_atapi_pc *pc = scsi->pc;
370 struct request *rq = pc->rq;
371 unsigned int temp;
372 u16 bcount;
373 u8 stat, ireason;
374
375#if IDESCSI_DEBUG_LOG
376 printk (KERN_INFO "ide-scsi: Reached idescsi_pc_intr interrupt handler\n");
377#endif /* IDESCSI_DEBUG_LOG */
378
379 if (pc->flags & PC_FLAG_TIMEDOUT) {
380#if IDESCSI_DEBUG_LOG
381 printk(KERN_WARNING "idescsi_pc_intr: got timed out packet %lu at %lu\n",
382 pc->scsi_cmd->serial_number, jiffies);
383#endif
384 /* end this request now - scsi should retry it*/
385 idescsi_end_request (drive, 1, 0);
386 return ide_stopped;
387 }
388 if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
389 pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
390#if IDESCSI_DEBUG_LOG
391 printk ("ide-scsi: %s: DMA complete\n", drive->name);
392#endif /* IDESCSI_DEBUG_LOG */
393 pc->xferred = pc->req_xfer;
394 (void)hwif->dma_ops->dma_end(drive);
395 }
396
397 /* Clear the interrupt */
398 stat = ide_read_status(drive);
399
400 if ((stat & DRQ_STAT) == 0) {
401 /* No more interrupts */
402 if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
403 printk(KERN_INFO "Packet command completed, %d bytes"
404 " transferred\n", pc->xferred);
405 local_irq_enable_in_hardirq();
406 if (stat & ERR_STAT)
407 rq->errors++;
408 idescsi_end_request (drive, 1, 0);
409 return ide_stopped;
410 }
411 bcount = (hwif->INB(hwif->io_ports.lbah_addr) << 8) |
412 hwif->INB(hwif->io_ports.lbam_addr);
413 ireason = hwif->INB(hwif->io_ports.nsect_addr);
414
415 if (ireason & CD) {
416 printk(KERN_ERR "ide-scsi: CoD != 0 in idescsi_pc_intr\n");
417 return ide_do_reset (drive);
418 }
419 if (ireason & IO) {
420 temp = pc->xferred + bcount;
421 if (temp > pc->req_xfer) {
422 if (temp > pc->buf_size) {
423 printk(KERN_ERR "ide-scsi: The scsi wants to "
424 "send us more data than expected "
425 "- discarding data\n");
426 temp = pc->buf_size - pc->xferred;
427 if (temp) {
428 pc->flags &= ~PC_FLAG_WRITING;
429 if (pc->sg)
430 idescsi_input_buffers(drive, pc,
431 temp);
432 else
433 hwif->input_data(drive, NULL,
434 pc->cur_pos, temp);
435 printk(KERN_ERR "ide-scsi: transferred"
436 " %d of %d bytes\n",
437 temp, bcount);
438 }
439 pc->xferred += temp;
440 pc->cur_pos += temp;
441 ide_pad_transfer(drive, 0, bcount - temp);
442 ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), idescsi_expiry);
443 return ide_started;
444 }
445#if IDESCSI_DEBUG_LOG
446 printk (KERN_NOTICE "ide-scsi: The scsi wants to send us more data than expected - allowing transfer\n");
447#endif /* IDESCSI_DEBUG_LOG */
448 }
449 }
450 if (ireason & IO) {
451 pc->flags &= ~PC_FLAG_WRITING;
452 if (pc->sg)
453 idescsi_input_buffers(drive, pc, bcount);
454 else
455 hwif->input_data(drive, NULL, pc->cur_pos, bcount);
456 } else {
457 pc->flags |= PC_FLAG_WRITING;
458 if (pc->sg)
459 idescsi_output_buffers(drive, pc, bcount);
460 else
461 hwif->output_data(drive, NULL, pc->cur_pos, bcount);
462 }
463 /* Update the current position */
464 pc->xferred += bcount;
465 pc->cur_pos += bcount;
466 360
467 /* And set the interrupt handler again */ 361 return ide_pc_intr(drive, pc, idescsi_pc_intr, get_timeout(pc),
468 ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), idescsi_expiry); 362 idescsi_expiry, NULL, NULL, NULL,
469 return ide_started; 363 ide_scsi_io_buffers);
470} 364}
471 365
472static ide_startstop_t idescsi_transfer_pc(ide_drive_t *drive) 366static ide_startstop_t idescsi_transfer_pc(ide_drive_t *drive)
473{ 367{
474 ide_hwif_t *hwif = drive->hwif;
475 idescsi_scsi_t *scsi = drive_to_idescsi(drive); 368 idescsi_scsi_t *scsi = drive_to_idescsi(drive);
476 struct ide_atapi_pc *pc = scsi->pc;
477 ide_startstop_t startstop;
478 u8 ireason;
479 369
480 if (ide_wait_stat(&startstop,drive,DRQ_STAT,BUSY_STAT,WAIT_READY)) { 370 return ide_transfer_pc(drive, scsi->pc, idescsi_pc_intr,
481 printk(KERN_ERR "ide-scsi: Strange, packet command " 371 get_timeout(scsi->pc), idescsi_expiry);
482 "initiated yet DRQ isn't asserted\n");
483 return startstop;
484 }
485 ireason = hwif->INB(hwif->io_ports.nsect_addr);
486 if ((ireason & CD) == 0 || (ireason & IO)) {
487 printk(KERN_ERR "ide-scsi: (IO,CoD) != (0,1) while "
488 "issuing a packet command\n");
489 return ide_do_reset (drive);
490 }
491 BUG_ON(HWGROUP(drive)->handler != NULL);
492 /* Set the interrupt routine */
493 ide_set_handler(drive, &idescsi_pc_intr, get_timeout(pc), idescsi_expiry);
494
495 /* Send the actual packet */
496 hwif->output_data(drive, NULL, scsi->pc->c, 12);
497
498 if (pc->flags & PC_FLAG_DMA_OK) {
499 pc->flags |= PC_FLAG_DMA_IN_PROGRESS;
500 hwif->dma_ops->dma_start(drive);
501 }
502 return ide_started;
503} 372}
504 373
505static inline int idescsi_set_direction(struct ide_atapi_pc *pc) 374static inline int idescsi_set_direction(struct ide_atapi_pc *pc)
@@ -545,38 +414,12 @@ static ide_startstop_t idescsi_issue_pc(ide_drive_t *drive,
545 struct ide_atapi_pc *pc) 414 struct ide_atapi_pc *pc)
546{ 415{
547 idescsi_scsi_t *scsi = drive_to_idescsi(drive); 416 idescsi_scsi_t *scsi = drive_to_idescsi(drive);
548 ide_hwif_t *hwif = drive->hwif;
549 u16 bcount;
550 u8 dma = 0;
551 417
552 /* Set the current packet command */ 418 /* Set the current packet command */
553 scsi->pc = pc; 419 scsi->pc = pc;
554 /* We haven't transferred any data yet */
555 pc->xferred = 0;
556 pc->cur_pos = pc->buf;
557 /* Request to transfer the entire buffer at once */
558 bcount = min(pc->req_xfer, 63 * 1024);
559
560 if (drive->using_dma && !idescsi_map_sg(drive, pc)) {
561 hwif->sg_mapped = 1;
562 dma = !hwif->dma_ops->dma_setup(drive);
563 hwif->sg_mapped = 0;
564 }
565
566 ide_pktcmd_tf_load(drive, IDE_TFLAG_NO_SELECT_MASK, bcount, dma);
567 420
568 if (dma) 421 return ide_issue_pc(drive, pc, idescsi_transfer_pc,
569 pc->flags |= PC_FLAG_DMA_OK; 422 get_timeout(pc), idescsi_expiry);
570
571 if (test_bit(IDESCSI_DRQ_INTERRUPT, &scsi->flags)) {
572 ide_execute_command(drive, WIN_PACKETCMD, &idescsi_transfer_pc,
573 get_timeout(pc), idescsi_expiry);
574 return ide_started;
575 } else {
576 /* Issue the packet command */
577 ide_execute_pkt_cmd(drive);
578 return idescsi_transfer_pc(drive);
579 }
580} 423}
581 424
582/* 425/*
@@ -584,14 +427,22 @@ static ide_startstop_t idescsi_issue_pc(ide_drive_t *drive,
584 */ 427 */
585static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block) 428static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block)
586{ 429{
587#if IDESCSI_DEBUG_LOG 430 debug_log("dev: %s, cmd: %x, errors: %d\n", rq->rq_disk->disk_name,
588 printk (KERN_INFO "dev: %s, cmd: %x, errors: %d\n", rq->rq_disk->disk_name,rq->cmd[0],rq->errors); 431 rq->cmd[0], rq->errors);
589 printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors); 432 debug_log("sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",
590#endif /* IDESCSI_DEBUG_LOG */ 433 rq->sector, rq->nr_sectors, rq->current_nr_sectors);
591 434
592 if (blk_sense_request(rq) || blk_special_request(rq)) { 435 if (blk_sense_request(rq) || blk_special_request(rq)) {
593 return idescsi_issue_pc(drive, 436 struct ide_atapi_pc *pc = (struct ide_atapi_pc *)rq->special;
594 (struct ide_atapi_pc *) rq->special); 437 idescsi_scsi_t *scsi = drive_to_idescsi(drive);
438
439 if (test_bit(IDESCSI_DRQ_INTERRUPT, &scsi->flags))
440 pc->flags |= PC_FLAG_DRQ_INTERRUPT;
441
442 if (drive->using_dma && !idescsi_map_sg(drive, pc))
443 pc->flags |= PC_FLAG_DMA_OK;
444
445 return idescsi_issue_pc(drive, pc);
595 } 446 }
596 blk_dump_rq_flags(rq, "ide-scsi: unsup command"); 447 blk_dump_rq_flags(rq, "ide-scsi: unsup command");
597 idescsi_end_request (drive, 0, 0); 448 idescsi_end_request (drive, 0, 0);
@@ -646,6 +497,8 @@ static void ide_scsi_remove(ide_drive_t *drive)
646 put_disk(g); 497 put_disk(g);
647 498
648 ide_scsi_put(scsi); 499 ide_scsi_put(scsi);
500
501 drive->scsi = 0;
649} 502}
650 503
651static int ide_scsi_probe(ide_drive_t *); 504static int ide_scsi_probe(ide_drive_t *);
@@ -765,6 +618,8 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
765 618
766 memset (pc->c, 0, 12); 619 memset (pc->c, 0, 12);
767 pc->flags = 0; 620 pc->flags = 0;
621 if (cmd->sc_data_direction == DMA_TO_DEVICE)
622 pc->flags |= PC_FLAG_WRITING;
768 pc->rq = rq; 623 pc->rq = rq;
769 memcpy (pc->c, cmd->cmnd, cmd->cmd_len); 624 memcpy (pc->c, cmd->cmnd, cmd->cmd_len);
770 pc->buf = NULL; 625 pc->buf = NULL;
@@ -775,6 +630,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
775 pc->scsi_cmd = cmd; 630 pc->scsi_cmd = cmd;
776 pc->done = done; 631 pc->done = done;
777 pc->timeout = jiffies + cmd->timeout_per_command; 632 pc->timeout = jiffies + cmd->timeout_per_command;
633 pc->callback = ide_scsi_callback;
778 634
779 if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) { 635 if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) {
780 printk ("ide-scsi: %s: que %lu, cmd = ", drive->name, cmd->serial_number); 636 printk ("ide-scsi: %s: que %lu, cmd = ", drive->name, cmd->serial_number);
@@ -785,12 +641,11 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
785 } 641 }
786 } 642 }
787 643
788 ide_init_drive_cmd (rq); 644 blk_rq_init(NULL, rq);
789 rq->special = (char *) pc; 645 rq->special = (char *) pc;
790 rq->cmd_type = REQ_TYPE_SPECIAL; 646 rq->cmd_type = REQ_TYPE_SPECIAL;
791 spin_unlock_irq(host->host_lock); 647 spin_unlock_irq(host->host_lock);
792 rq->rq_disk = scsi->disk; 648 blk_execute_rq_nowait(drive->queue, scsi->disk, rq, 0, NULL);
793 (void) ide_do_drive_cmd (drive, rq, ide_end);
794 spin_lock_irq(host->host_lock); 649 spin_lock_irq(host->host_lock);
795 return 0; 650 return 0;
796abort: 651abort:
@@ -985,6 +840,8 @@ static int ide_scsi_probe(ide_drive_t *drive)
985 !(host = scsi_host_alloc(&idescsi_template,sizeof(idescsi_scsi_t)))) 840 !(host = scsi_host_alloc(&idescsi_template,sizeof(idescsi_scsi_t))))
986 return -ENODEV; 841 return -ENODEV;
987 842
843 drive->scsi = 1;
844
988 g = alloc_disk(1 << PARTN_BITS); 845 g = alloc_disk(1 << PARTN_BITS);
989 if (!g) 846 if (!g)
990 goto out_host_put; 847 goto out_host_put;
@@ -993,10 +850,10 @@ static int ide_scsi_probe(ide_drive_t *drive)
993 850
994 host->max_id = 1; 851 host->max_id = 1;
995 852
996#if IDESCSI_DEBUG_LOG
997 if (drive->id->last_lun) 853 if (drive->id->last_lun)
998 printk(KERN_NOTICE "%s: id->last_lun=%u\n", drive->name, drive->id->last_lun); 854 debug_log("%s: id->last_lun=%u\n", drive->name,
999#endif 855 drive->id->last_lun);
856
1000 if ((drive->id->last_lun & 0x7) != 7) 857 if ((drive->id->last_lun & 0x7) != 7)
1001 host->max_lun = (drive->id->last_lun & 0x7) + 1; 858 host->max_lun = (drive->id->last_lun & 0x7) + 1;
1002 else 859 else
@@ -1025,6 +882,7 @@ static int ide_scsi_probe(ide_drive_t *drive)
1025 882
1026 put_disk(g); 883 put_disk(g);
1027out_host_put: 884out_host_put:
885 drive->scsi = 0;
1028 scsi_host_put(host); 886 scsi_host_put(host);
1029 return err; 887 return err;
1030} 888}
diff --git a/drivers/video/fb_ddc.c b/drivers/video/fb_ddc.c
index a0df63289b5f..0cf96eb8a60f 100644
--- a/drivers/video/fb_ddc.c
+++ b/drivers/video/fb_ddc.c
@@ -106,6 +106,7 @@ unsigned char *fb_ddc_read(struct i2c_adapter *adapter)
106 algo_data->setsda(algo_data->data, 1); 106 algo_data->setsda(algo_data->data, 1);
107 algo_data->setscl(algo_data->data, 1); 107 algo_data->setscl(algo_data->data, 1);
108 108
109 adapter->class |= I2C_CLASS_DDC;
109 return edid; 110 return edid;
110} 111}
111 112
diff --git a/drivers/video/intelfb/intelfb_i2c.c b/drivers/video/intelfb/intelfb_i2c.c
index ca95f09d8b43..fcf9fadbf572 100644
--- a/drivers/video/intelfb/intelfb_i2c.c
+++ b/drivers/video/intelfb/intelfb_i2c.c
@@ -100,7 +100,8 @@ static int intelfb_gpio_getsda(void *data)
100 100
101static int intelfb_setup_i2c_bus(struct intelfb_info *dinfo, 101static int intelfb_setup_i2c_bus(struct intelfb_info *dinfo,
102 struct intelfb_i2c_chan *chan, 102 struct intelfb_i2c_chan *chan,
103 const u32 reg, const char *name) 103 const u32 reg, const char *name,
104 int class)
104{ 105{
105 int rc; 106 int rc;
106 107
@@ -108,6 +109,7 @@ static int intelfb_setup_i2c_bus(struct intelfb_info *dinfo,
108 chan->reg = reg; 109 chan->reg = reg;
109 snprintf(chan->adapter.name, sizeof(chan->adapter.name), 110 snprintf(chan->adapter.name, sizeof(chan->adapter.name),
110 "intelfb %s", name); 111 "intelfb %s", name);
112 chan->adapter.class = class;
111 chan->adapter.owner = THIS_MODULE; 113 chan->adapter.owner = THIS_MODULE;
112 chan->adapter.id = I2C_HW_B_INTELFB; 114 chan->adapter.id = I2C_HW_B_INTELFB;
113 chan->adapter.algo_data = &chan->algo; 115 chan->adapter.algo_data = &chan->algo;
@@ -145,7 +147,7 @@ void intelfb_create_i2c_busses(struct intelfb_info *dinfo)
145 147
146 /* setup the DDC bus for analog output */ 148 /* setup the DDC bus for analog output */
147 intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].ddc_bus, GPIOA, 149 intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].ddc_bus, GPIOA,
148 "CRTDDC_A"); 150 "CRTDDC_A", I2C_CLASS_DDC);
149 i++; 151 i++;
150 152
151 /* need to add the output busses for each device 153 /* need to add the output busses for each device
@@ -159,9 +161,9 @@ void intelfb_create_i2c_busses(struct intelfb_info *dinfo)
159 case INTEL_865G: 161 case INTEL_865G:
160 dinfo->output[i].type = INTELFB_OUTPUT_DVO; 162 dinfo->output[i].type = INTELFB_OUTPUT_DVO;
161 intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].ddc_bus, 163 intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].ddc_bus,
162 GPIOD, "DVODDC_D"); 164 GPIOD, "DVODDC_D", I2C_CLASS_DDC);
163 intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].i2c_bus, 165 intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].i2c_bus,
164 GPIOE, "DVOI2C_E"); 166 GPIOE, "DVOI2C_E", 0);
165 i++; 167 i++;
166 break; 168 break;
167 case INTEL_915G: 169 case INTEL_915G:
@@ -174,7 +176,7 @@ void intelfb_create_i2c_busses(struct intelfb_info *dinfo)
174 /* SDVO ports have a single control bus - 2 devices */ 176 /* SDVO ports have a single control bus - 2 devices */
175 dinfo->output[i].type = INTELFB_OUTPUT_SDVO; 177 dinfo->output[i].type = INTELFB_OUTPUT_SDVO;
176 intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].i2c_bus, 178 intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].i2c_bus,
177 GPIOE, "SDVOCTRL_E"); 179 GPIOE, "SDVOCTRL_E", 0);
178 /* TODO: initialize the SDVO */ 180 /* TODO: initialize the SDVO */
179 /* I830SDVOInit(pScrn, i, DVOB); */ 181 /* I830SDVOInit(pScrn, i, DVOB); */
180 i++; 182 i++;
diff --git a/drivers/video/matrox/i2c-matroxfb.c b/drivers/video/matrox/i2c-matroxfb.c
index 4baab7be58de..75ee5a12e549 100644
--- a/drivers/video/matrox/i2c-matroxfb.c
+++ b/drivers/video/matrox/i2c-matroxfb.c
@@ -104,7 +104,9 @@ static struct i2c_algo_bit_data matrox_i2c_algo_template =
104}; 104};
105 105
106static int i2c_bus_reg(struct i2c_bit_adapter* b, struct matrox_fb_info* minfo, 106static int i2c_bus_reg(struct i2c_bit_adapter* b, struct matrox_fb_info* minfo,
107 unsigned int data, unsigned int clock, const char* name) { 107 unsigned int data, unsigned int clock, const char *name,
108 int class)
109{
108 int err; 110 int err;
109 111
110 b->minfo = minfo; 112 b->minfo = minfo;
@@ -114,6 +116,7 @@ static int i2c_bus_reg(struct i2c_bit_adapter* b, struct matrox_fb_info* minfo,
114 snprintf(b->adapter.name, sizeof(b->adapter.name), name, 116 snprintf(b->adapter.name, sizeof(b->adapter.name), name,
115 minfo->fbcon.node); 117 minfo->fbcon.node);
116 i2c_set_adapdata(&b->adapter, b); 118 i2c_set_adapdata(&b->adapter, b);
119 b->adapter.class = class;
117 b->adapter.algo_data = &b->bac; 120 b->adapter.algo_data = &b->bac;
118 b->adapter.dev.parent = &ACCESS_FBINFO(pcidev)->dev; 121 b->adapter.dev.parent = &ACCESS_FBINFO(pcidev)->dev;
119 b->bac = matrox_i2c_algo_template; 122 b->bac = matrox_i2c_algo_template;
@@ -159,22 +162,29 @@ static void* i2c_matroxfb_probe(struct matrox_fb_info* minfo) {
159 switch (ACCESS_FBINFO(chip)) { 162 switch (ACCESS_FBINFO(chip)) {
160 case MGA_2064: 163 case MGA_2064:
161 case MGA_2164: 164 case MGA_2164:
162 err = i2c_bus_reg(&m2info->ddc1, minfo, DDC1B_DATA, DDC1B_CLK, "DDC:fb%u #0"); 165 err = i2c_bus_reg(&m2info->ddc1, minfo,
166 DDC1B_DATA, DDC1B_CLK,
167 "DDC:fb%u #0", I2C_CLASS_DDC);
163 break; 168 break;
164 default: 169 default:
165 err = i2c_bus_reg(&m2info->ddc1, minfo, DDC1_DATA, DDC1_CLK, "DDC:fb%u #0"); 170 err = i2c_bus_reg(&m2info->ddc1, minfo,
171 DDC1_DATA, DDC1_CLK,
172 "DDC:fb%u #0", I2C_CLASS_DDC);
166 break; 173 break;
167 } 174 }
168 if (err) 175 if (err)
169 goto fail_ddc1; 176 goto fail_ddc1;
170 if (ACCESS_FBINFO(devflags.dualhead)) { 177 if (ACCESS_FBINFO(devflags.dualhead)) {
171 err = i2c_bus_reg(&m2info->ddc2, minfo, DDC2_DATA, DDC2_CLK, "DDC:fb%u #1"); 178 err = i2c_bus_reg(&m2info->ddc2, minfo,
179 DDC2_DATA, DDC2_CLK,
180 "DDC:fb%u #1", I2C_CLASS_DDC);
172 if (err == -ENODEV) { 181 if (err == -ENODEV) {
173 printk(KERN_INFO "i2c-matroxfb: VGA->TV plug detected, DDC unavailable.\n"); 182 printk(KERN_INFO "i2c-matroxfb: VGA->TV plug detected, DDC unavailable.\n");
174 } else if (err) 183 } else if (err)
175 printk(KERN_INFO "i2c-matroxfb: Could not register secondary output i2c bus. Continuing anyway.\n"); 184 printk(KERN_INFO "i2c-matroxfb: Could not register secondary output i2c bus. Continuing anyway.\n");
176 /* Register maven bus even on G450/G550 */ 185 /* Register maven bus even on G450/G550 */
177 err = i2c_bus_reg(&m2info->maven, minfo, MAT_DATA, MAT_CLK, "MAVEN:fb%u"); 186 err = i2c_bus_reg(&m2info->maven, minfo,
187 MAT_DATA, MAT_CLK, "MAVEN:fb%u", 0);
178 if (err) 188 if (err)
179 printk(KERN_INFO "i2c-matroxfb: Could not register Maven i2c bus. Continuing anyway.\n"); 189 printk(KERN_INFO "i2c-matroxfb: Could not register Maven i2c bus. Continuing anyway.\n");
180 } 190 }
diff --git a/fs/buffer.c b/fs/buffer.c
index 0f51c0f7c266..5fa1512cd9a2 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1691,11 +1691,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1691 */ 1691 */
1692 clear_buffer_dirty(bh); 1692 clear_buffer_dirty(bh);
1693 set_buffer_uptodate(bh); 1693 set_buffer_uptodate(bh);
1694 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { 1694 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
1695 buffer_dirty(bh)) {
1695 WARN_ON(bh->b_size != blocksize); 1696 WARN_ON(bh->b_size != blocksize);
1696 err = get_block(inode, block, bh, 1); 1697 err = get_block(inode, block, bh, 1);
1697 if (err) 1698 if (err)
1698 goto recover; 1699 goto recover;
1700 clear_buffer_delay(bh);
1699 if (buffer_new(bh)) { 1701 if (buffer_new(bh)) {
1700 /* blockdev mappings never come here */ 1702 /* blockdev mappings never come here */
1701 clear_buffer_new(bh); 1703 clear_buffer_new(bh);
@@ -1774,7 +1776,8 @@ recover:
1774 bh = head; 1776 bh = head;
1775 /* Recovery: lock and submit the mapped buffers */ 1777 /* Recovery: lock and submit the mapped buffers */
1776 do { 1778 do {
1777 if (buffer_mapped(bh) && buffer_dirty(bh)) { 1779 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1780 !buffer_delay(bh)) {
1778 lock_buffer(bh); 1781 lock_buffer(bh);
1779 mark_buffer_async_write(bh); 1782 mark_buffer_async_write(bh);
1780 } else { 1783 } else {
@@ -2061,6 +2064,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
2061 struct page *page, void *fsdata) 2064 struct page *page, void *fsdata)
2062{ 2065{
2063 struct inode *inode = mapping->host; 2066 struct inode *inode = mapping->host;
2067 int i_size_changed = 0;
2064 2068
2065 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); 2069 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2066 2070
@@ -2073,12 +2077,21 @@ int generic_write_end(struct file *file, struct address_space *mapping,
2073 */ 2077 */
2074 if (pos+copied > inode->i_size) { 2078 if (pos+copied > inode->i_size) {
2075 i_size_write(inode, pos+copied); 2079 i_size_write(inode, pos+copied);
2076 mark_inode_dirty(inode); 2080 i_size_changed = 1;
2077 } 2081 }
2078 2082
2079 unlock_page(page); 2083 unlock_page(page);
2080 page_cache_release(page); 2084 page_cache_release(page);
2081 2085
2086 /*
2087 * Don't mark the inode dirty under page lock. First, it unnecessarily
2088 * makes the holding time of page lock longer. Second, it forces lock
2089 * ordering of page lock and transaction start for journaling
2090 * filesystems.
2091 */
2092 if (i_size_changed)
2093 mark_inode_dirty(inode);
2094
2082 return copied; 2095 return copied;
2083} 2096}
2084EXPORT_SYMBOL(generic_write_end); 2097EXPORT_SYMBOL(generic_write_end);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 9cc80b9cc8d8..495ab21b9832 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -47,7 +47,7 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
47 ext4_group_t block_group) 47 ext4_group_t block_group)
48{ 48{
49 ext4_group_t actual_group; 49 ext4_group_t actual_group;
50 ext4_get_group_no_and_offset(sb, block, &actual_group, 0); 50 ext4_get_group_no_and_offset(sb, block, &actual_group, NULL);
51 if (actual_group == block_group) 51 if (actual_group == block_group)
52 return 1; 52 return 1;
53 return 0; 53 return 0;
@@ -121,12 +121,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
121 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); 121 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
122 } 122 }
123 } else { /* For META_BG_BLOCK_GROUPS */ 123 } else { /* For META_BG_BLOCK_GROUPS */
124 int group_rel = (block_group - 124 bit_max += ext4_bg_num_gdb(sb, block_group);
125 le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
126 EXT4_DESC_PER_BLOCK(sb);
127 if (group_rel == 0 || group_rel == 1 ||
128 (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1))
129 bit_max += 1;
130 } 125 }
131 126
132 if (block_group == sbi->s_groups_count - 1) { 127 if (block_group == sbi->s_groups_count - 1) {
@@ -295,7 +290,7 @@ err_out:
295 return 0; 290 return 0;
296} 291}
297/** 292/**
298 * read_block_bitmap() 293 * ext4_read_block_bitmap()
299 * @sb: super block 294 * @sb: super block
300 * @block_group: given block group 295 * @block_group: given block group
301 * 296 *
@@ -305,7 +300,7 @@ err_out:
305 * Return buffer_head on success or NULL in case of failure. 300 * Return buffer_head on success or NULL in case of failure.
306 */ 301 */
307struct buffer_head * 302struct buffer_head *
308read_block_bitmap(struct super_block *sb, ext4_group_t block_group) 303ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
309{ 304{
310 struct ext4_group_desc * desc; 305 struct ext4_group_desc * desc;
311 struct buffer_head * bh = NULL; 306 struct buffer_head * bh = NULL;
@@ -409,8 +404,7 @@ restart:
409 prev = rsv; 404 prev = rsv;
410 } 405 }
411 printk("Window map complete.\n"); 406 printk("Window map complete.\n");
412 if (bad) 407 BUG_ON(bad);
413 BUG();
414} 408}
415#define rsv_window_dump(root, verbose) \ 409#define rsv_window_dump(root, verbose) \
416 __rsv_window_dump((root), (verbose), __func__) 410 __rsv_window_dump((root), (verbose), __func__)
@@ -694,7 +688,7 @@ do_more:
694 count -= overflow; 688 count -= overflow;
695 } 689 }
696 brelse(bitmap_bh); 690 brelse(bitmap_bh);
697 bitmap_bh = read_block_bitmap(sb, block_group); 691 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
698 if (!bitmap_bh) 692 if (!bitmap_bh)
699 goto error_return; 693 goto error_return;
700 desc = ext4_get_group_desc (sb, block_group, &gd_bh); 694 desc = ext4_get_group_desc (sb, block_group, &gd_bh);
@@ -810,6 +804,13 @@ do_more:
810 spin_unlock(sb_bgl_lock(sbi, block_group)); 804 spin_unlock(sb_bgl_lock(sbi, block_group));
811 percpu_counter_add(&sbi->s_freeblocks_counter, count); 805 percpu_counter_add(&sbi->s_freeblocks_counter, count);
812 806
807 if (sbi->s_log_groups_per_flex) {
808 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
809 spin_lock(sb_bgl_lock(sbi, flex_group));
810 sbi->s_flex_groups[flex_group].free_blocks += count;
811 spin_unlock(sb_bgl_lock(sbi, flex_group));
812 }
813
813 /* We dirtied the bitmap block */ 814 /* We dirtied the bitmap block */
814 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 815 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
815 err = ext4_journal_dirty_metadata(handle, bitmap_bh); 816 err = ext4_journal_dirty_metadata(handle, bitmap_bh);
@@ -1598,23 +1599,35 @@ out:
1598 1599
1599/** 1600/**
1600 * ext4_has_free_blocks() 1601 * ext4_has_free_blocks()
1601 * @sbi: in-core super block structure. 1602 * @sbi: in-core super block structure.
1603 * @nblocks: number of neeed blocks
1602 * 1604 *
1603 * Check if filesystem has at least 1 free block available for allocation. 1605 * Check if filesystem has free blocks available for allocation.
1606 * Return the number of blocks avaible for allocation for this request
1607 * On success, return nblocks
1604 */ 1608 */
1605static int ext4_has_free_blocks(struct ext4_sb_info *sbi) 1609ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
1610 ext4_fsblk_t nblocks)
1606{ 1611{
1607 ext4_fsblk_t free_blocks, root_blocks; 1612 ext4_fsblk_t free_blocks;
1613 ext4_fsblk_t root_blocks = 0;
1608 1614
1609 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 1615 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1610 root_blocks = ext4_r_blocks_count(sbi->s_es); 1616
1611 if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && 1617 if (!capable(CAP_SYS_RESOURCE) &&
1612 sbi->s_resuid != current->fsuid && 1618 sbi->s_resuid != current->fsuid &&
1613 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { 1619 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
1614 return 0; 1620 root_blocks = ext4_r_blocks_count(sbi->s_es);
1615 } 1621#ifdef CONFIG_SMP
1616 return 1; 1622 if (free_blocks - root_blocks < FBC_BATCH)
1617} 1623 free_blocks =
1624 percpu_counter_sum_and_set(&sbi->s_freeblocks_counter);
1625#endif
1626 if (free_blocks - root_blocks < nblocks)
1627 return free_blocks - root_blocks;
1628 return nblocks;
1629 }
1630
1618 1631
1619/** 1632/**
1620 * ext4_should_retry_alloc() 1633 * ext4_should_retry_alloc()
@@ -1630,7 +1643,7 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi)
1630 */ 1643 */
1631int ext4_should_retry_alloc(struct super_block *sb, int *retries) 1644int ext4_should_retry_alloc(struct super_block *sb, int *retries)
1632{ 1645{
1633 if (!ext4_has_free_blocks(EXT4_SB(sb)) || (*retries)++ > 3) 1646 if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3)
1634 return 0; 1647 return 0;
1635 1648
1636 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); 1649 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
@@ -1639,20 +1652,24 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
1639} 1652}
1640 1653
1641/** 1654/**
1642 * ext4_new_blocks_old() -- core block(s) allocation function 1655 * ext4_old_new_blocks() -- core block bitmap based block allocation function
1656 *
1643 * @handle: handle to this transaction 1657 * @handle: handle to this transaction
1644 * @inode: file inode 1658 * @inode: file inode
1645 * @goal: given target block(filesystem wide) 1659 * @goal: given target block(filesystem wide)
1646 * @count: target number of blocks to allocate 1660 * @count: target number of blocks to allocate
1647 * @errp: error code 1661 * @errp: error code
1648 * 1662 *
1649 * ext4_new_blocks uses a goal block to assist allocation. It tries to 1663 * ext4_old_new_blocks uses a goal block to assist allocation and look up
1650 * allocate block(s) from the block group contains the goal block first. If that 1664 * the block bitmap directly to do block allocation. It tries to
1651 * fails, it will try to allocate block(s) from other block groups without 1665 * allocate block(s) from the block group contains the goal block first. If
1652 * any specific goal block. 1666 * that fails, it will try to allocate block(s) from other block groups
1667 * without any specific goal block.
1668 *
1669 * This function is called when -o nomballoc mount option is enabled
1653 * 1670 *
1654 */ 1671 */
1655ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, 1672ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
1656 ext4_fsblk_t goal, unsigned long *count, int *errp) 1673 ext4_fsblk_t goal, unsigned long *count, int *errp)
1657{ 1674{
1658 struct buffer_head *bitmap_bh = NULL; 1675 struct buffer_head *bitmap_bh = NULL;
@@ -1676,13 +1693,26 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
1676 ext4_group_t ngroups; 1693 ext4_group_t ngroups;
1677 unsigned long num = *count; 1694 unsigned long num = *count;
1678 1695
1679 *errp = -ENOSPC;
1680 sb = inode->i_sb; 1696 sb = inode->i_sb;
1681 if (!sb) { 1697 if (!sb) {
1698 *errp = -ENODEV;
1682 printk("ext4_new_block: nonexistent device"); 1699 printk("ext4_new_block: nonexistent device");
1683 return 0; 1700 return 0;
1684 } 1701 }
1685 1702
1703 sbi = EXT4_SB(sb);
1704 if (!EXT4_I(inode)->i_delalloc_reserved_flag) {
1705 /*
1706 * With delalloc we already reserved the blocks
1707 */
1708 *count = ext4_has_free_blocks(sbi, *count);
1709 }
1710 if (*count == 0) {
1711 *errp = -ENOSPC;
1712 return 0; /*return with ENOSPC error */
1713 }
1714 num = *count;
1715
1686 /* 1716 /*
1687 * Check quota for allocation of this block. 1717 * Check quota for allocation of this block.
1688 */ 1718 */
@@ -1706,11 +1736,6 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
1706 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) 1736 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
1707 my_rsv = &block_i->rsv_window_node; 1737 my_rsv = &block_i->rsv_window_node;
1708 1738
1709 if (!ext4_has_free_blocks(sbi)) {
1710 *errp = -ENOSPC;
1711 goto out;
1712 }
1713
1714 /* 1739 /*
1715 * First, test whether the goal block is free. 1740 * First, test whether the goal block is free.
1716 */ 1741 */
@@ -1734,7 +1759,7 @@ retry_alloc:
1734 my_rsv = NULL; 1759 my_rsv = NULL;
1735 1760
1736 if (free_blocks > 0) { 1761 if (free_blocks > 0) {
1737 bitmap_bh = read_block_bitmap(sb, group_no); 1762 bitmap_bh = ext4_read_block_bitmap(sb, group_no);
1738 if (!bitmap_bh) 1763 if (!bitmap_bh)
1739 goto io_error; 1764 goto io_error;
1740 grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, 1765 grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
@@ -1770,7 +1795,7 @@ retry_alloc:
1770 continue; 1795 continue;
1771 1796
1772 brelse(bitmap_bh); 1797 brelse(bitmap_bh);
1773 bitmap_bh = read_block_bitmap(sb, group_no); 1798 bitmap_bh = ext4_read_block_bitmap(sb, group_no);
1774 if (!bitmap_bh) 1799 if (!bitmap_bh)
1775 goto io_error; 1800 goto io_error;
1776 /* 1801 /*
@@ -1882,7 +1907,15 @@ allocated:
1882 le16_add_cpu(&gdp->bg_free_blocks_count, -num); 1907 le16_add_cpu(&gdp->bg_free_blocks_count, -num);
1883 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); 1908 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
1884 spin_unlock(sb_bgl_lock(sbi, group_no)); 1909 spin_unlock(sb_bgl_lock(sbi, group_no));
1885 percpu_counter_sub(&sbi->s_freeblocks_counter, num); 1910 if (!EXT4_I(inode)->i_delalloc_reserved_flag)
1911 percpu_counter_sub(&sbi->s_freeblocks_counter, num);
1912
1913 if (sbi->s_log_groups_per_flex) {
1914 ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
1915 spin_lock(sb_bgl_lock(sbi, flex_group));
1916 sbi->s_flex_groups[flex_group].free_blocks -= num;
1917 spin_unlock(sb_bgl_lock(sbi, flex_group));
1918 }
1886 1919
1887 BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); 1920 BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
1888 err = ext4_journal_dirty_metadata(handle, gdp_bh); 1921 err = ext4_journal_dirty_metadata(handle, gdp_bh);
@@ -1915,46 +1948,104 @@ out:
1915 return 0; 1948 return 0;
1916} 1949}
1917 1950
1918ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode, 1951#define EXT4_META_BLOCK 0x1
1919 ext4_fsblk_t goal, int *errp) 1952
1953static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
1954 ext4_lblk_t iblock, ext4_fsblk_t goal,
1955 unsigned long *count, int *errp, int flags)
1920{ 1956{
1921 struct ext4_allocation_request ar; 1957 struct ext4_allocation_request ar;
1922 ext4_fsblk_t ret; 1958 ext4_fsblk_t ret;
1923 1959
1924 if (!test_opt(inode->i_sb, MBALLOC)) { 1960 if (!test_opt(inode->i_sb, MBALLOC)) {
1925 unsigned long count = 1; 1961 return ext4_old_new_blocks(handle, inode, goal, count, errp);
1926 ret = ext4_new_blocks_old(handle, inode, goal, &count, errp);
1927 return ret;
1928 } 1962 }
1929 1963
1930 memset(&ar, 0, sizeof(ar)); 1964 memset(&ar, 0, sizeof(ar));
1965 /* Fill with neighbour allocated blocks */
1966
1931 ar.inode = inode; 1967 ar.inode = inode;
1932 ar.goal = goal; 1968 ar.goal = goal;
1933 ar.len = 1; 1969 ar.len = *count;
1970 ar.logical = iblock;
1971
1972 if (S_ISREG(inode->i_mode) && !(flags & EXT4_META_BLOCK))
1973 /* enable in-core preallocation for data block allocation */
1974 ar.flags = EXT4_MB_HINT_DATA;
1975 else
1976 /* disable in-core preallocation for non-regular files */
1977 ar.flags = 0;
1978
1934 ret = ext4_mb_new_blocks(handle, &ar, errp); 1979 ret = ext4_mb_new_blocks(handle, &ar, errp);
1980 *count = ar.len;
1935 return ret; 1981 return ret;
1936} 1982}
1937 1983
1938ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, 1984/*
1985 * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
1986 *
1987 * @handle: handle to this transaction
1988 * @inode: file inode
1989 * @goal: given target block(filesystem wide)
1990 * @count: total number of blocks need
1991 * @errp: error code
1992 *
1993 * Return 1st allocated block numberon success, *count stores total account
1994 * error stores in errp pointer
1995 */
1996ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1939 ext4_fsblk_t goal, unsigned long *count, int *errp) 1997 ext4_fsblk_t goal, unsigned long *count, int *errp)
1940{ 1998{
1941 struct ext4_allocation_request ar;
1942 ext4_fsblk_t ret; 1999 ext4_fsblk_t ret;
1943 2000 ret = do_blk_alloc(handle, inode, 0, goal,
1944 if (!test_opt(inode->i_sb, MBALLOC)) { 2001 count, errp, EXT4_META_BLOCK);
1945 ret = ext4_new_blocks_old(handle, inode, goal, count, errp); 2002 /*
1946 return ret; 2003 * Account for the allocated meta blocks
2004 */
2005 if (!(*errp)) {
2006 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
2007 EXT4_I(inode)->i_allocated_meta_blocks += *count;
2008 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1947 } 2009 }
1948
1949 memset(&ar, 0, sizeof(ar));
1950 ar.inode = inode;
1951 ar.goal = goal;
1952 ar.len = *count;
1953 ret = ext4_mb_new_blocks(handle, &ar, errp);
1954 *count = ar.len;
1955 return ret; 2010 return ret;
1956} 2011}
1957 2012
2013/*
2014 * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks
2015 *
2016 * @handle: handle to this transaction
2017 * @inode: file inode
2018 * @goal: given target block(filesystem wide)
2019 * @errp: error code
2020 *
2021 * Return allocated block number on success
2022 */
2023ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
2024 ext4_fsblk_t goal, int *errp)
2025{
2026 unsigned long count = 1;
2027 return ext4_new_meta_blocks(handle, inode, goal, &count, errp);
2028}
2029
2030/*
2031 * ext4_new_blocks() -- allocate data blocks
2032 *
2033 * @handle: handle to this transaction
2034 * @inode: file inode
2035 * @goal: given target block(filesystem wide)
2036 * @count: total number of blocks need
2037 * @errp: error code
2038 *
2039 * Return 1st allocated block numberon success, *count stores total account
2040 * error stores in errp pointer
2041 */
2042
2043ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
2044 ext4_lblk_t iblock, ext4_fsblk_t goal,
2045 unsigned long *count, int *errp)
2046{
2047 return do_blk_alloc(handle, inode, iblock, goal, count, errp, 0);
2048}
1958 2049
1959/** 2050/**
1960 * ext4_count_free_blocks() -- count filesystem free blocks 2051 * ext4_count_free_blocks() -- count filesystem free blocks
@@ -1986,7 +2077,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
1986 continue; 2077 continue;
1987 desc_count += le16_to_cpu(gdp->bg_free_blocks_count); 2078 desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
1988 brelse(bitmap_bh); 2079 brelse(bitmap_bh);
1989 bitmap_bh = read_block_bitmap(sb, i); 2080 bitmap_bh = ext4_read_block_bitmap(sb, i);
1990 if (bitmap_bh == NULL) 2081 if (bitmap_bh == NULL)
1991 continue; 2082 continue;
1992 2083
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 2bf0331ea194..d3d23d73c08b 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -129,7 +129,8 @@ static int ext4_readdir(struct file * filp,
129 struct buffer_head *bh = NULL; 129 struct buffer_head *bh = NULL;
130 130
131 map_bh.b_state = 0; 131 map_bh.b_state = 0;
132 err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0); 132 err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh,
133 0, 0, 0);
133 if (err > 0) { 134 if (err > 0) {
134 pgoff_t index = map_bh.b_blocknr >> 135 pgoff_t index = map_bh.b_blocknr >>
135 (PAGE_CACHE_SHIFT - inode->i_blkbits); 136 (PAGE_CACHE_SHIFT - inode->i_blkbits);
@@ -272,7 +273,7 @@ static void free_rb_tree_fname(struct rb_root *root)
272 273
273 while (n) { 274 while (n) {
274 /* Do the node's children first */ 275 /* Do the node's children first */
275 if ((n)->rb_left) { 276 if (n->rb_left) {
276 n = n->rb_left; 277 n = n->rb_left;
277 continue; 278 continue;
278 } 279 }
@@ -301,24 +302,18 @@ static void free_rb_tree_fname(struct rb_root *root)
301 parent->rb_right = NULL; 302 parent->rb_right = NULL;
302 n = parent; 303 n = parent;
303 } 304 }
304 root->rb_node = NULL;
305} 305}
306 306
307 307
308static struct dir_private_info *create_dir_info(loff_t pos) 308static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos)
309{ 309{
310 struct dir_private_info *p; 310 struct dir_private_info *p;
311 311
312 p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); 312 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
313 if (!p) 313 if (!p)
314 return NULL; 314 return NULL;
315 p->root.rb_node = NULL;
316 p->curr_node = NULL;
317 p->extra_fname = NULL;
318 p->last_pos = 0;
319 p->curr_hash = pos2maj_hash(pos); 315 p->curr_hash = pos2maj_hash(pos);
320 p->curr_minor_hash = pos2min_hash(pos); 316 p->curr_minor_hash = pos2min_hash(pos);
321 p->next_hash = 0;
322 return p; 317 return p;
323} 318}
324 319
@@ -433,7 +428,7 @@ static int ext4_dx_readdir(struct file * filp,
433 int ret; 428 int ret;
434 429
435 if (!info) { 430 if (!info) {
436 info = create_dir_info(filp->f_pos); 431 info = ext4_htree_create_dir_info(filp->f_pos);
437 if (!info) 432 if (!info)
438 return -ENOMEM; 433 return -ENOMEM;
439 filp->private_data = info; 434 filp->private_data = info;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8158083f7ac0..303e41cf7b14 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -22,7 +22,7 @@
22#include "ext4_i.h" 22#include "ext4_i.h"
23 23
24/* 24/*
25 * The second extended filesystem constants/structures 25 * The fourth extended filesystem constants/structures
26 */ 26 */
27 27
28/* 28/*
@@ -45,7 +45,7 @@
45#define ext4_debug(f, a...) \ 45#define ext4_debug(f, a...) \
46 do { \ 46 do { \
47 printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ 47 printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
48 __FILE__, __LINE__, __FUNCTION__); \ 48 __FILE__, __LINE__, __func__); \
49 printk (KERN_DEBUG f, ## a); \ 49 printk (KERN_DEBUG f, ## a); \
50 } while (0) 50 } while (0)
51#else 51#else
@@ -74,6 +74,9 @@
74#define EXT4_MB_HINT_GOAL_ONLY 256 74#define EXT4_MB_HINT_GOAL_ONLY 256
75/* goal is meaningful */ 75/* goal is meaningful */
76#define EXT4_MB_HINT_TRY_GOAL 512 76#define EXT4_MB_HINT_TRY_GOAL 512
77/* blocks already pre-reserved by delayed allocation */
78#define EXT4_MB_DELALLOC_RESERVED 1024
79
77 80
78struct ext4_allocation_request { 81struct ext4_allocation_request {
79 /* target inode for block we're allocating */ 82 /* target inode for block we're allocating */
@@ -170,6 +173,15 @@ struct ext4_group_desc
170 __u32 bg_reserved2[3]; 173 __u32 bg_reserved2[3];
171}; 174};
172 175
176/*
177 * Structure of a flex block group info
178 */
179
180struct flex_groups {
181 __u32 free_inodes;
182 __u32 free_blocks;
183};
184
173#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ 185#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
174#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ 186#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
175#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ 187#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
@@ -527,6 +539,7 @@ do { \
527#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 539#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
528#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ 540#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
529#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ 541#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
542#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
530/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ 543/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
531#ifndef _LINUX_EXT2_FS_H 544#ifndef _LINUX_EXT2_FS_H
532#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt 545#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
@@ -647,7 +660,10 @@ struct ext4_super_block {
647 __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ 660 __le16 s_mmp_interval; /* # seconds to wait in MMP checking */
648 __le64 s_mmp_block; /* Block for multi-mount protection */ 661 __le64 s_mmp_block; /* Block for multi-mount protection */
649 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ 662 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
650 __u32 s_reserved[163]; /* Padding to the end of the block */ 663 __u8 s_log_groups_per_flex; /* FLEX_BG group size */
664 __u8 s_reserved_char_pad2;
665 __le16 s_reserved_pad;
666 __u32 s_reserved[162]; /* Padding to the end of the block */
651}; 667};
652 668
653#ifdef __KERNEL__ 669#ifdef __KERNEL__
@@ -958,12 +974,17 @@ extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
958extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); 974extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
959extern unsigned long ext4_bg_num_gdb(struct super_block *sb, 975extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
960 ext4_group_t group); 976 ext4_group_t group);
961extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode, 977extern ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
962 ext4_fsblk_t goal, int *errp); 978 ext4_fsblk_t goal, int *errp);
963extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode, 979extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
964 ext4_fsblk_t goal, unsigned long *count, int *errp); 980 ext4_fsblk_t goal, unsigned long *count, int *errp);
965extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode, 981extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
982 ext4_lblk_t iblock, ext4_fsblk_t goal,
983 unsigned long *count, int *errp);
984extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
966 ext4_fsblk_t goal, unsigned long *count, int *errp); 985 ext4_fsblk_t goal, unsigned long *count, int *errp);
986extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
987 ext4_fsblk_t nblocks);
967extern void ext4_free_blocks (handle_t *handle, struct inode *inode, 988extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
968 ext4_fsblk_t block, unsigned long count, int metadata); 989 ext4_fsblk_t block, unsigned long count, int metadata);
969extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, 990extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
@@ -1016,9 +1037,14 @@ extern int __init init_ext4_mballoc(void);
1016extern void exit_ext4_mballoc(void); 1037extern void exit_ext4_mballoc(void);
1017extern void ext4_mb_free_blocks(handle_t *, struct inode *, 1038extern void ext4_mb_free_blocks(handle_t *, struct inode *,
1018 unsigned long, unsigned long, int, unsigned long *); 1039 unsigned long, unsigned long, int, unsigned long *);
1040extern int ext4_mb_add_more_groupinfo(struct super_block *sb,
1041 ext4_group_t i, struct ext4_group_desc *desc);
1042extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
1043 ext4_grpblk_t add);
1019 1044
1020 1045
1021/* inode.c */ 1046/* inode.c */
1047void ext4_da_release_space(struct inode *inode, int used, int to_free);
1022int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, 1048int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
1023 struct buffer_head *bh, ext4_fsblk_t blocknr); 1049 struct buffer_head *bh, ext4_fsblk_t blocknr);
1024struct buffer_head *ext4_getblk(handle_t *, struct inode *, 1050struct buffer_head *ext4_getblk(handle_t *, struct inode *,
@@ -1033,19 +1059,23 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
1033extern struct inode *ext4_iget(struct super_block *, unsigned long); 1059extern struct inode *ext4_iget(struct super_block *, unsigned long);
1034extern int ext4_write_inode (struct inode *, int); 1060extern int ext4_write_inode (struct inode *, int);
1035extern int ext4_setattr (struct dentry *, struct iattr *); 1061extern int ext4_setattr (struct dentry *, struct iattr *);
1062extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
1063 struct kstat *stat);
1036extern void ext4_delete_inode (struct inode *); 1064extern void ext4_delete_inode (struct inode *);
1037extern int ext4_sync_inode (handle_t *, struct inode *); 1065extern int ext4_sync_inode (handle_t *, struct inode *);
1038extern void ext4_discard_reservation (struct inode *); 1066extern void ext4_discard_reservation (struct inode *);
1039extern void ext4_dirty_inode(struct inode *); 1067extern void ext4_dirty_inode(struct inode *);
1040extern int ext4_change_inode_journal_flag(struct inode *, int); 1068extern int ext4_change_inode_journal_flag(struct inode *, int);
1041extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); 1069extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
1070extern int ext4_can_truncate(struct inode *inode);
1042extern void ext4_truncate (struct inode *); 1071extern void ext4_truncate (struct inode *);
1043extern void ext4_set_inode_flags(struct inode *); 1072extern void ext4_set_inode_flags(struct inode *);
1044extern void ext4_get_inode_flags(struct ext4_inode_info *); 1073extern void ext4_get_inode_flags(struct ext4_inode_info *);
1045extern void ext4_set_aops(struct inode *inode); 1074extern void ext4_set_aops(struct inode *inode);
1046extern int ext4_writepage_trans_blocks(struct inode *); 1075extern int ext4_writepage_trans_blocks(struct inode *);
1047extern int ext4_block_truncate_page(handle_t *handle, struct page *page, 1076extern int ext4_block_truncate_page(handle_t *handle,
1048 struct address_space *mapping, loff_t from); 1077 struct address_space *mapping, loff_t from);
1078extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
1049 1079
1050/* ioctl.c */ 1080/* ioctl.c */
1051extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 1081extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
@@ -1159,10 +1189,21 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
1159} 1189}
1160 1190
1161 1191
1192static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi,
1193 ext4_group_t block_group)
1194{
1195 return block_group >> sbi->s_log_groups_per_flex;
1196}
1197
1198static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi)
1199{
1200 return 1 << sbi->s_log_groups_per_flex;
1201}
1202
1162#define ext4_std_error(sb, errno) \ 1203#define ext4_std_error(sb, errno) \
1163do { \ 1204do { \
1164 if ((errno)) \ 1205 if ((errno)) \
1165 __ext4_std_error((sb), __FUNCTION__, (errno)); \ 1206 __ext4_std_error((sb), __func__, (errno)); \
1166} while (0) 1207} while (0)
1167 1208
1168/* 1209/*
@@ -1191,7 +1232,7 @@ extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
1191 ext4_lblk_t iblock, 1232 ext4_lblk_t iblock,
1192 unsigned long max_blocks, struct buffer_head *bh_result, 1233 unsigned long max_blocks, struct buffer_head *bh_result,
1193 int create, int extend_disksize); 1234 int create, int extend_disksize);
1194extern void ext4_ext_truncate(struct inode *, struct page *); 1235extern void ext4_ext_truncate(struct inode *);
1195extern void ext4_ext_init(struct super_block *); 1236extern void ext4_ext_init(struct super_block *);
1196extern void ext4_ext_release(struct super_block *); 1237extern void ext4_ext_release(struct super_block *);
1197extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, 1238extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
@@ -1199,7 +1240,7 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
1199extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, 1240extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
1200 sector_t block, unsigned long max_blocks, 1241 sector_t block, unsigned long max_blocks,
1201 struct buffer_head *bh, int create, 1242 struct buffer_head *bh, int create,
1202 int extend_disksize); 1243 int extend_disksize, int flag);
1203#endif /* __KERNEL__ */ 1244#endif /* __KERNEL__ */
1204 1245
1205#endif /* _EXT4_H */ 1246#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 75333b595fab..6c166c0a54b7 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -212,6 +212,7 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
212 (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); 212 (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
213} 213}
214 214
215extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
215extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 216extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
216extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 217extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
217extern int ext4_extent_tree_init(handle_t *, struct inode *); 218extern int ext4_extent_tree_init(handle_t *, struct inode *);
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
index 26a4ae255d79..ef7409f0e7e4 100644
--- a/fs/ext4/ext4_i.h
+++ b/fs/ext4/ext4_i.h
@@ -79,7 +79,7 @@ struct ext4_ext_cache {
79}; 79};
80 80
81/* 81/*
82 * third extended file system inode data in memory 82 * fourth extended file system inode data in memory
83 */ 83 */
84struct ext4_inode_info { 84struct ext4_inode_info {
85 __le32 i_data[15]; /* unconverted */ 85 __le32 i_data[15]; /* unconverted */
@@ -150,6 +150,7 @@ struct ext4_inode_info {
150 */ 150 */
151 struct rw_semaphore i_data_sem; 151 struct rw_semaphore i_data_sem;
152 struct inode vfs_inode; 152 struct inode vfs_inode;
153 struct jbd2_inode jinode;
153 154
154 unsigned long i_ext_generation; 155 unsigned long i_ext_generation;
155 struct ext4_ext_cache i_cached_extent; 156 struct ext4_ext_cache i_cached_extent;
@@ -162,6 +163,13 @@ struct ext4_inode_info {
162 /* mballoc */ 163 /* mballoc */
163 struct list_head i_prealloc_list; 164 struct list_head i_prealloc_list;
164 spinlock_t i_prealloc_lock; 165 spinlock_t i_prealloc_lock;
166
167 /* allocation reservation info for delalloc */
168 unsigned long i_reserved_data_blocks;
169 unsigned long i_reserved_meta_blocks;
170 unsigned long i_allocated_meta_blocks;
171 unsigned short i_delalloc_reserved_flag;
172 spinlock_t i_block_reservation_lock;
165}; 173};
166 174
167#endif /* _EXT4_I */ 175#endif /* _EXT4_I */
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 9255a7d28b24..eb8bc3afe6e9 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -142,19 +142,17 @@ int __ext4_journal_dirty_metadata(const char *where,
142 handle_t *handle, struct buffer_head *bh); 142 handle_t *handle, struct buffer_head *bh);
143 143
144#define ext4_journal_get_undo_access(handle, bh) \ 144#define ext4_journal_get_undo_access(handle, bh) \
145 __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh)) 145 __ext4_journal_get_undo_access(__func__, (handle), (bh))
146#define ext4_journal_get_write_access(handle, bh) \ 146#define ext4_journal_get_write_access(handle, bh) \
147 __ext4_journal_get_write_access(__FUNCTION__, (handle), (bh)) 147 __ext4_journal_get_write_access(__func__, (handle), (bh))
148#define ext4_journal_revoke(handle, blocknr, bh) \ 148#define ext4_journal_revoke(handle, blocknr, bh) \
149 __ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh)) 149 __ext4_journal_revoke(__func__, (handle), (blocknr), (bh))
150#define ext4_journal_get_create_access(handle, bh) \ 150#define ext4_journal_get_create_access(handle, bh) \
151 __ext4_journal_get_create_access(__FUNCTION__, (handle), (bh)) 151 __ext4_journal_get_create_access(__func__, (handle), (bh))
152#define ext4_journal_dirty_metadata(handle, bh) \ 152#define ext4_journal_dirty_metadata(handle, bh) \
153 __ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh)) 153 __ext4_journal_dirty_metadata(__func__, (handle), (bh))
154#define ext4_journal_forget(handle, bh) \ 154#define ext4_journal_forget(handle, bh) \
155 __ext4_journal_forget(__FUNCTION__, (handle), (bh)) 155 __ext4_journal_forget(__func__, (handle), (bh))
156
157int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
158 156
159handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); 157handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
160int __ext4_journal_stop(const char *where, handle_t *handle); 158int __ext4_journal_stop(const char *where, handle_t *handle);
@@ -165,7 +163,7 @@ static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
165} 163}
166 164
167#define ext4_journal_stop(handle) \ 165#define ext4_journal_stop(handle) \
168 __ext4_journal_stop(__FUNCTION__, (handle)) 166 __ext4_journal_stop(__func__, (handle))
169 167
170static inline handle_t *ext4_journal_current_handle(void) 168static inline handle_t *ext4_journal_current_handle(void)
171{ 169{
@@ -192,6 +190,11 @@ static inline int ext4_journal_force_commit(journal_t *journal)
192 return jbd2_journal_force_commit(journal); 190 return jbd2_journal_force_commit(journal);
193} 191}
194 192
193static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
194{
195 return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
196}
197
195/* super.c */ 198/* super.c */
196int ext4_force_commit(struct super_block *sb); 199int ext4_force_commit(struct super_block *sb);
197 200
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 5802e69f2191..6300226d5531 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -25,7 +25,7 @@
25#include <linux/rbtree.h> 25#include <linux/rbtree.h>
26 26
27/* 27/*
28 * third extended-fs super-block data in memory 28 * fourth extended-fs super-block data in memory
29 */ 29 */
30struct ext4_sb_info { 30struct ext4_sb_info {
31 unsigned long s_desc_size; /* Size of a group descriptor in bytes */ 31 unsigned long s_desc_size; /* Size of a group descriptor in bytes */
@@ -143,6 +143,9 @@ struct ext4_sb_info {
143 143
144 /* locality groups */ 144 /* locality groups */
145 struct ext4_locality_group *s_locality_groups; 145 struct ext4_locality_group *s_locality_groups;
146
147 unsigned int s_log_groups_per_flex;
148 struct flex_groups *s_flex_groups;
146}; 149};
147 150
148#endif /* _EXT4_SB */ 151#endif /* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 47929c4e3dae..42c4c0c892ed 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -92,17 +92,16 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
92 ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); 92 ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
93} 93}
94 94
95static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed) 95static int ext4_ext_journal_restart(handle_t *handle, int needed)
96{ 96{
97 int err; 97 int err;
98 98
99 if (handle->h_buffer_credits > needed) 99 if (handle->h_buffer_credits > needed)
100 return handle; 100 return 0;
101 if (!ext4_journal_extend(handle, needed)) 101 err = ext4_journal_extend(handle, needed);
102 return handle; 102 if (err)
103 err = ext4_journal_restart(handle, needed); 103 return err;
104 104 return ext4_journal_restart(handle, needed);
105 return handle;
106} 105}
107 106
108/* 107/*
@@ -180,15 +179,18 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
180 return bg_start + colour + block; 179 return bg_start + colour + block;
181} 180}
182 181
182/*
183 * Allocation for a meta data block
184 */
183static ext4_fsblk_t 185static ext4_fsblk_t
184ext4_ext_new_block(handle_t *handle, struct inode *inode, 186ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
185 struct ext4_ext_path *path, 187 struct ext4_ext_path *path,
186 struct ext4_extent *ex, int *err) 188 struct ext4_extent *ex, int *err)
187{ 189{
188 ext4_fsblk_t goal, newblock; 190 ext4_fsblk_t goal, newblock;
189 191
190 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); 192 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
191 newblock = ext4_new_block(handle, inode, goal, err); 193 newblock = ext4_new_meta_block(handle, inode, goal, err);
192 return newblock; 194 return newblock;
193} 195}
194 196
@@ -246,6 +248,36 @@ static int ext4_ext_space_root_idx(struct inode *inode)
246 return size; 248 return size;
247} 249}
248 250
251/*
252 * Calculate the number of metadata blocks needed
253 * to allocate @blocks
254 * Worse case is one block per extent
255 */
256int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
257{
258 int lcap, icap, rcap, leafs, idxs, num;
259 int newextents = blocks;
260
261 rcap = ext4_ext_space_root_idx(inode);
262 lcap = ext4_ext_space_block(inode);
263 icap = ext4_ext_space_block_idx(inode);
264
265 /* number of new leaf blocks needed */
266 num = leafs = (newextents + lcap - 1) / lcap;
267
268 /*
269 * Worse case, we need separate index block(s)
270 * to link all new leaf blocks
271 */
272 idxs = (leafs + icap - 1) / icap;
273 do {
274 num += idxs;
275 idxs = (idxs + icap - 1) / icap;
276 } while (idxs > rcap);
277
278 return num;
279}
280
249static int 281static int
250ext4_ext_max_entries(struct inode *inode, int depth) 282ext4_ext_max_entries(struct inode *inode, int depth)
251{ 283{
@@ -524,6 +556,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
524 alloc = 1; 556 alloc = 1;
525 } 557 }
526 path[0].p_hdr = eh; 558 path[0].p_hdr = eh;
559 path[0].p_bh = NULL;
527 560
528 i = depth; 561 i = depth;
529 /* walk through the tree */ 562 /* walk through the tree */
@@ -552,12 +585,14 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
552 } 585 }
553 586
554 path[ppos].p_depth = i; 587 path[ppos].p_depth = i;
555 path[ppos].p_hdr = eh;
556 path[ppos].p_ext = NULL; 588 path[ppos].p_ext = NULL;
557 path[ppos].p_idx = NULL; 589 path[ppos].p_idx = NULL;
558 590
559 /* find extent */ 591 /* find extent */
560 ext4_ext_binsearch(inode, path + ppos, block); 592 ext4_ext_binsearch(inode, path + ppos, block);
593 /* if not an empty leaf */
594 if (path[ppos].p_ext)
595 path[ppos].p_block = ext_pblock(path[ppos].p_ext);
561 596
562 ext4_ext_show_path(inode, path); 597 ext4_ext_show_path(inode, path);
563 598
@@ -688,7 +723,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
688 /* allocate all needed blocks */ 723 /* allocate all needed blocks */
689 ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); 724 ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
690 for (a = 0; a < depth - at; a++) { 725 for (a = 0; a < depth - at; a++) {
691 newblock = ext4_ext_new_block(handle, inode, path, newext, &err); 726 newblock = ext4_ext_new_meta_block(handle, inode, path,
727 newext, &err);
692 if (newblock == 0) 728 if (newblock == 0)
693 goto cleanup; 729 goto cleanup;
694 ablocks[a] = newblock; 730 ablocks[a] = newblock;
@@ -884,7 +920,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
884 ext4_fsblk_t newblock; 920 ext4_fsblk_t newblock;
885 int err = 0; 921 int err = 0;
886 922
887 newblock = ext4_ext_new_block(handle, inode, path, newext, &err); 923 newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err);
888 if (newblock == 0) 924 if (newblock == 0)
889 return err; 925 return err;
890 926
@@ -981,6 +1017,8 @@ repeat:
981 /* if we found index with free entry, then use that 1017 /* if we found index with free entry, then use that
982 * entry: create all needed subtree and add new leaf */ 1018 * entry: create all needed subtree and add new leaf */
983 err = ext4_ext_split(handle, inode, path, newext, i); 1019 err = ext4_ext_split(handle, inode, path, newext, i);
1020 if (err)
1021 goto out;
984 1022
985 /* refill path */ 1023 /* refill path */
986 ext4_ext_drop_refs(path); 1024 ext4_ext_drop_refs(path);
@@ -1883,11 +1921,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1883 credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 1921 credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
1884#endif 1922#endif
1885 1923
1886 handle = ext4_ext_journal_restart(handle, credits); 1924 err = ext4_ext_journal_restart(handle, credits);
1887 if (IS_ERR(handle)) { 1925 if (err)
1888 err = PTR_ERR(handle);
1889 goto out; 1926 goto out;
1890 }
1891 1927
1892 err = ext4_ext_get_access(handle, inode, path + depth); 1928 err = ext4_ext_get_access(handle, inode, path + depth);
1893 if (err) 1929 if (err)
@@ -2529,6 +2565,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2529 int err = 0, depth, ret; 2565 int err = 0, depth, ret;
2530 unsigned long allocated = 0; 2566 unsigned long allocated = 0;
2531 struct ext4_allocation_request ar; 2567 struct ext4_allocation_request ar;
2568 loff_t disksize;
2532 2569
2533 __clear_bit(BH_New, &bh_result->b_state); 2570 __clear_bit(BH_New, &bh_result->b_state);
2534 ext_debug("blocks %u/%lu requested for inode %u\n", 2571 ext_debug("blocks %u/%lu requested for inode %u\n",
@@ -2616,8 +2653,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2616 */ 2653 */
2617 if (allocated > max_blocks) 2654 if (allocated > max_blocks)
2618 allocated = max_blocks; 2655 allocated = max_blocks;
2619 /* mark the buffer unwritten */ 2656 set_buffer_unwritten(bh_result);
2620 __set_bit(BH_Unwritten, &bh_result->b_state);
2621 goto out2; 2657 goto out2;
2622 } 2658 }
2623 2659
@@ -2716,14 +2752,19 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2716 goto out2; 2752 goto out2;
2717 } 2753 }
2718 2754
2719 if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize)
2720 EXT4_I(inode)->i_disksize = inode->i_size;
2721
2722 /* previous routine could use block we allocated */ 2755 /* previous routine could use block we allocated */
2723 newblock = ext_pblock(&newex); 2756 newblock = ext_pblock(&newex);
2724 allocated = ext4_ext_get_actual_len(&newex); 2757 allocated = ext4_ext_get_actual_len(&newex);
2725outnew: 2758outnew:
2726 __set_bit(BH_New, &bh_result->b_state); 2759 if (extend_disksize) {
2760 disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
2761 if (disksize > i_size_read(inode))
2762 disksize = i_size_read(inode);
2763 if (disksize > EXT4_I(inode)->i_disksize)
2764 EXT4_I(inode)->i_disksize = disksize;
2765 }
2766
2767 set_buffer_new(bh_result);
2727 2768
2728 /* Cache only when it is _not_ an uninitialized extent */ 2769 /* Cache only when it is _not_ an uninitialized extent */
2729 if (create != EXT4_CREATE_UNINITIALIZED_EXT) 2770 if (create != EXT4_CREATE_UNINITIALIZED_EXT)
@@ -2733,7 +2774,7 @@ out:
2733 if (allocated > max_blocks) 2774 if (allocated > max_blocks)
2734 allocated = max_blocks; 2775 allocated = max_blocks;
2735 ext4_ext_show_leaf(inode, path); 2776 ext4_ext_show_leaf(inode, path);
2736 __set_bit(BH_Mapped, &bh_result->b_state); 2777 set_buffer_mapped(bh_result);
2737 bh_result->b_bdev = inode->i_sb->s_bdev; 2778 bh_result->b_bdev = inode->i_sb->s_bdev;
2738 bh_result->b_blocknr = newblock; 2779 bh_result->b_blocknr = newblock;
2739out2: 2780out2:
@@ -2744,7 +2785,7 @@ out2:
2744 return err ? err : allocated; 2785 return err ? err : allocated;
2745} 2786}
2746 2787
2747void ext4_ext_truncate(struct inode * inode, struct page *page) 2788void ext4_ext_truncate(struct inode *inode)
2748{ 2789{
2749 struct address_space *mapping = inode->i_mapping; 2790 struct address_space *mapping = inode->i_mapping;
2750 struct super_block *sb = inode->i_sb; 2791 struct super_block *sb = inode->i_sb;
@@ -2757,18 +2798,14 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
2757 */ 2798 */
2758 err = ext4_writepage_trans_blocks(inode) + 3; 2799 err = ext4_writepage_trans_blocks(inode) + 3;
2759 handle = ext4_journal_start(inode, err); 2800 handle = ext4_journal_start(inode, err);
2760 if (IS_ERR(handle)) { 2801 if (IS_ERR(handle))
2761 if (page) {
2762 clear_highpage(page);
2763 flush_dcache_page(page);
2764 unlock_page(page);
2765 page_cache_release(page);
2766 }
2767 return; 2802 return;
2768 }
2769 2803
2770 if (page) 2804 if (inode->i_size & (sb->s_blocksize - 1))
2771 ext4_block_truncate_page(handle, page, mapping, inode->i_size); 2805 ext4_block_truncate_page(handle, mapping, inode->i_size);
2806
2807 if (ext4_orphan_add(handle, inode))
2808 goto out_stop;
2772 2809
2773 down_write(&EXT4_I(inode)->i_data_sem); 2810 down_write(&EXT4_I(inode)->i_data_sem);
2774 ext4_ext_invalidate_cache(inode); 2811 ext4_ext_invalidate_cache(inode);
@@ -2780,8 +2817,6 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
2780 * Probably we need not scan at all, 2817 * Probably we need not scan at all,
2781 * because page truncation is enough. 2818 * because page truncation is enough.
2782 */ 2819 */
2783 if (ext4_orphan_add(handle, inode))
2784 goto out_stop;
2785 2820
2786 /* we have to know where to truncate from in crash case */ 2821 /* we have to know where to truncate from in crash case */
2787 EXT4_I(inode)->i_disksize = inode->i_size; 2822 EXT4_I(inode)->i_disksize = inode->i_size;
@@ -2798,6 +2833,7 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
2798 handle->h_sync = 1; 2833 handle->h_sync = 1;
2799 2834
2800out_stop: 2835out_stop:
2836 up_write(&EXT4_I(inode)->i_data_sem);
2801 /* 2837 /*
2802 * If this was a simple ftruncate() and the file will remain alive, 2838 * If this was a simple ftruncate() and the file will remain alive,
2803 * then we need to clear up the orphan record which we created above. 2839 * then we need to clear up the orphan record which we created above.
@@ -2808,7 +2844,6 @@ out_stop:
2808 if (inode->i_nlink) 2844 if (inode->i_nlink)
2809 ext4_orphan_del(handle, inode); 2845 ext4_orphan_del(handle, inode);
2810 2846
2811 up_write(&EXT4_I(inode)->i_data_sem);
2812 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 2847 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
2813 ext4_mark_inode_dirty(handle, inode); 2848 ext4_mark_inode_dirty(handle, inode);
2814 ext4_journal_stop(handle); 2849 ext4_journal_stop(handle);
@@ -2911,7 +2946,7 @@ retry:
2911 } 2946 }
2912 ret = ext4_get_blocks_wrap(handle, inode, block, 2947 ret = ext4_get_blocks_wrap(handle, inode, block,
2913 max_blocks, &map_bh, 2948 max_blocks, &map_bh,
2914 EXT4_CREATE_UNINITIALIZED_EXT, 0); 2949 EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
2915 if (ret <= 0) { 2950 if (ret <= 0) {
2916#ifdef EXT4FS_DEBUG 2951#ifdef EXT4FS_DEBUG
2917 WARN_ON(ret <= 0); 2952 WARN_ON(ret <= 0);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 4159be6366ab..430eb7978db4 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -123,6 +123,23 @@ force_commit:
123 return ret; 123 return ret;
124} 124}
125 125
126static struct vm_operations_struct ext4_file_vm_ops = {
127 .fault = filemap_fault,
128 .page_mkwrite = ext4_page_mkwrite,
129};
130
131static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
132{
133 struct address_space *mapping = file->f_mapping;
134
135 if (!mapping->a_ops->readpage)
136 return -ENOEXEC;
137 file_accessed(file);
138 vma->vm_ops = &ext4_file_vm_ops;
139 vma->vm_flags |= VM_CAN_NONLINEAR;
140 return 0;
141}
142
126const struct file_operations ext4_file_operations = { 143const struct file_operations ext4_file_operations = {
127 .llseek = generic_file_llseek, 144 .llseek = generic_file_llseek,
128 .read = do_sync_read, 145 .read = do_sync_read,
@@ -133,7 +150,7 @@ const struct file_operations ext4_file_operations = {
133#ifdef CONFIG_COMPAT 150#ifdef CONFIG_COMPAT
134 .compat_ioctl = ext4_compat_ioctl, 151 .compat_ioctl = ext4_compat_ioctl,
135#endif 152#endif
136 .mmap = generic_file_mmap, 153 .mmap = ext4_file_mmap,
137 .open = generic_file_open, 154 .open = generic_file_open,
138 .release = ext4_release_file, 155 .release = ext4_release_file,
139 .fsync = ext4_sync_file, 156 .fsync = ext4_sync_file,
@@ -144,6 +161,7 @@ const struct file_operations ext4_file_operations = {
144const struct inode_operations ext4_file_inode_operations = { 161const struct inode_operations ext4_file_inode_operations = {
145 .truncate = ext4_truncate, 162 .truncate = ext4_truncate,
146 .setattr = ext4_setattr, 163 .setattr = ext4_setattr,
164 .getattr = ext4_getattr,
147#ifdef CONFIG_EXT4DEV_FS_XATTR 165#ifdef CONFIG_EXT4DEV_FS_XATTR
148 .setxattr = generic_setxattr, 166 .setxattr = generic_setxattr,
149 .getxattr = generic_getxattr, 167 .getxattr = generic_getxattr,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 1c8ba48d4f8d..a45c3737ad31 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -27,6 +27,7 @@
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/blkdev.h>
30#include "ext4.h" 31#include "ext4.h"
31#include "ext4_jbd2.h" 32#include "ext4_jbd2.h"
32 33
@@ -45,6 +46,7 @@
45int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) 46int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
46{ 47{
47 struct inode *inode = dentry->d_inode; 48 struct inode *inode = dentry->d_inode;
49 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
48 int ret = 0; 50 int ret = 0;
49 51
50 J_ASSERT(ext4_journal_current_handle() == NULL); 52 J_ASSERT(ext4_journal_current_handle() == NULL);
@@ -85,6 +87,8 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
85 .nr_to_write = 0, /* sys_fsync did this */ 87 .nr_to_write = 0, /* sys_fsync did this */
86 }; 88 };
87 ret = sync_inode(inode, &wbc); 89 ret = sync_inode(inode, &wbc);
90 if (journal && (journal->j_flags & JBD2_BARRIER))
91 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
88 } 92 }
89out: 93out:
90 return ret; 94 return ret;
diff --git a/fs/ext4/group.h b/fs/ext4/group.h
index 7eb0604e7eea..c2c0a8d06d0e 100644
--- a/fs/ext4/group.h
+++ b/fs/ext4/group.h
@@ -13,7 +13,7 @@ extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
13 struct ext4_group_desc *gdp); 13 struct ext4_group_desc *gdp);
14extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, 14extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
15 struct ext4_group_desc *gdp); 15 struct ext4_group_desc *gdp);
16struct buffer_head *read_block_bitmap(struct super_block *sb, 16struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
17 ext4_group_t block_group); 17 ext4_group_t block_group);
18extern unsigned ext4_init_block_bitmap(struct super_block *sb, 18extern unsigned ext4_init_block_bitmap(struct super_block *sb,
19 struct buffer_head *bh, 19 struct buffer_head *bh,
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index c6efbab0c801..a92eb305344f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -157,6 +157,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
157 struct ext4_super_block * es; 157 struct ext4_super_block * es;
158 struct ext4_sb_info *sbi; 158 struct ext4_sb_info *sbi;
159 int fatal = 0, err; 159 int fatal = 0, err;
160 ext4_group_t flex_group;
160 161
161 if (atomic_read(&inode->i_count) > 1) { 162 if (atomic_read(&inode->i_count) > 1) {
162 printk ("ext4_free_inode: inode has count=%d\n", 163 printk ("ext4_free_inode: inode has count=%d\n",
@@ -232,6 +233,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
232 if (is_directory) 233 if (is_directory)
233 percpu_counter_dec(&sbi->s_dirs_counter); 234 percpu_counter_dec(&sbi->s_dirs_counter);
234 235
236 if (sbi->s_log_groups_per_flex) {
237 flex_group = ext4_flex_group(sbi, block_group);
238 spin_lock(sb_bgl_lock(sbi, flex_group));
239 sbi->s_flex_groups[flex_group].free_inodes++;
240 spin_unlock(sb_bgl_lock(sbi, flex_group));
241 }
235 } 242 }
236 BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); 243 BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
237 err = ext4_journal_dirty_metadata(handle, bh2); 244 err = ext4_journal_dirty_metadata(handle, bh2);
@@ -286,6 +293,80 @@ static int find_group_dir(struct super_block *sb, struct inode *parent,
286 return ret; 293 return ret;
287} 294}
288 295
296#define free_block_ratio 10
297
298static int find_group_flex(struct super_block *sb, struct inode *parent,
299 ext4_group_t *best_group)
300{
301 struct ext4_sb_info *sbi = EXT4_SB(sb);
302 struct ext4_group_desc *desc;
303 struct buffer_head *bh;
304 struct flex_groups *flex_group = sbi->s_flex_groups;
305 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
306 ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
307 ext4_group_t ngroups = sbi->s_groups_count;
308 int flex_size = ext4_flex_bg_size(sbi);
309 ext4_group_t best_flex = parent_fbg_group;
310 int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
311 int flexbg_free_blocks;
312 int flex_freeb_ratio;
313 ext4_group_t n_fbg_groups;
314 ext4_group_t i;
315
316 n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >>
317 sbi->s_log_groups_per_flex;
318
319find_close_to_parent:
320 flexbg_free_blocks = flex_group[best_flex].free_blocks;
321 flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
322 if (flex_group[best_flex].free_inodes &&
323 flex_freeb_ratio > free_block_ratio)
324 goto found_flexbg;
325
326 if (best_flex && best_flex == parent_fbg_group) {
327 best_flex--;
328 goto find_close_to_parent;
329 }
330
331 for (i = 0; i < n_fbg_groups; i++) {
332 if (i == parent_fbg_group || i == parent_fbg_group - 1)
333 continue;
334
335 flexbg_free_blocks = flex_group[i].free_blocks;
336 flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
337
338 if (flex_freeb_ratio > free_block_ratio &&
339 flex_group[i].free_inodes) {
340 best_flex = i;
341 goto found_flexbg;
342 }
343
344 if (best_flex < 0 ||
345 (flex_group[i].free_blocks >
346 flex_group[best_flex].free_blocks &&
347 flex_group[i].free_inodes))
348 best_flex = i;
349 }
350
351 if (!flex_group[best_flex].free_inodes ||
352 !flex_group[best_flex].free_blocks)
353 return -1;
354
355found_flexbg:
356 for (i = best_flex * flex_size; i < ngroups &&
357 i < (best_flex + 1) * flex_size; i++) {
358 desc = ext4_get_group_desc(sb, i, &bh);
359 if (le16_to_cpu(desc->bg_free_inodes_count)) {
360 *best_group = i;
361 goto out;
362 }
363 }
364
365 return -1;
366out:
367 return 0;
368}
369
289/* 370/*
290 * Orlov's allocator for directories. 371 * Orlov's allocator for directories.
291 * 372 *
@@ -501,6 +582,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
501 struct inode *ret; 582 struct inode *ret;
502 ext4_group_t i; 583 ext4_group_t i;
503 int free = 0; 584 int free = 0;
585 ext4_group_t flex_group;
504 586
505 /* Cannot create files in a deleted directory */ 587 /* Cannot create files in a deleted directory */
506 if (!dir || !dir->i_nlink) 588 if (!dir || !dir->i_nlink)
@@ -514,6 +596,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
514 596
515 sbi = EXT4_SB(sb); 597 sbi = EXT4_SB(sb);
516 es = sbi->s_es; 598 es = sbi->s_es;
599
600 if (sbi->s_log_groups_per_flex) {
601 ret2 = find_group_flex(sb, dir, &group);
602 goto got_group;
603 }
604
517 if (S_ISDIR(mode)) { 605 if (S_ISDIR(mode)) {
518 if (test_opt (sb, OLDALLOC)) 606 if (test_opt (sb, OLDALLOC))
519 ret2 = find_group_dir(sb, dir, &group); 607 ret2 = find_group_dir(sb, dir, &group);
@@ -522,6 +610,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
522 } else 610 } else
523 ret2 = find_group_other(sb, dir, &group); 611 ret2 = find_group_other(sb, dir, &group);
524 612
613got_group:
525 err = -ENOSPC; 614 err = -ENOSPC;
526 if (ret2 == -1) 615 if (ret2 == -1)
527 goto out; 616 goto out;
@@ -600,7 +689,7 @@ got:
600 /* We may have to initialize the block bitmap if it isn't already */ 689 /* We may have to initialize the block bitmap if it isn't already */
601 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && 690 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
602 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 691 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
603 struct buffer_head *block_bh = read_block_bitmap(sb, group); 692 struct buffer_head *block_bh = ext4_read_block_bitmap(sb, group);
604 693
605 BUFFER_TRACE(block_bh, "get block bitmap access"); 694 BUFFER_TRACE(block_bh, "get block bitmap access");
606 err = ext4_journal_get_write_access(handle, block_bh); 695 err = ext4_journal_get_write_access(handle, block_bh);
@@ -676,6 +765,13 @@ got:
676 percpu_counter_inc(&sbi->s_dirs_counter); 765 percpu_counter_inc(&sbi->s_dirs_counter);
677 sb->s_dirt = 1; 766 sb->s_dirt = 1;
678 767
768 if (sbi->s_log_groups_per_flex) {
769 flex_group = ext4_flex_group(sbi, group);
770 spin_lock(sb_bgl_lock(sbi, flex_group));
771 sbi->s_flex_groups[flex_group].free_inodes--;
772 spin_unlock(sb_bgl_lock(sbi, flex_group));
773 }
774
679 inode->i_uid = current->fsuid; 775 inode->i_uid = current->fsuid;
680 if (test_opt (sb, GRPID)) 776 if (test_opt (sb, GRPID))
681 inode->i_gid = dir->i_gid; 777 inode->i_gid = dir->i_gid;
@@ -740,14 +836,10 @@ got:
740 goto fail_free_drop; 836 goto fail_free_drop;
741 837
742 if (test_opt(sb, EXTENTS)) { 838 if (test_opt(sb, EXTENTS)) {
743 /* set extent flag only for diretory, file and normal symlink*/ 839 /* set extent flag only for directory, file and normal symlink*/
744 if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { 840 if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
745 EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; 841 EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
746 ext4_ext_tree_init(handle, inode); 842 ext4_ext_tree_init(handle, inode);
747 err = ext4_update_incompat_feature(handle, sb,
748 EXT4_FEATURE_INCOMPAT_EXTENTS);
749 if (err)
750 goto fail_free_drop;
751 } 843 }
752 } 844 }
753 845
@@ -817,6 +909,14 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
817 if (IS_ERR(inode)) 909 if (IS_ERR(inode))
818 goto iget_failed; 910 goto iget_failed;
819 911
912 /*
913 * If the orphans has i_nlinks > 0 then it should be able to be
914 * truncated, otherwise it won't be removed from the orphan list
915 * during processing and an infinite loop will result.
916 */
917 if (inode->i_nlink && !ext4_can_truncate(inode))
918 goto bad_orphan;
919
820 if (NEXT_ORPHAN(inode) > max_ino) 920 if (NEXT_ORPHAN(inode) > max_ino)
821 goto bad_orphan; 921 goto bad_orphan;
822 brelse(bitmap_bh); 922 brelse(bitmap_bh);
@@ -838,6 +938,7 @@ bad_orphan:
838 printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", 938 printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
839 NEXT_ORPHAN(inode)); 939 NEXT_ORPHAN(inode));
840 printk(KERN_NOTICE "max_ino=%lu\n", max_ino); 940 printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
941 printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
841 /* Avoid freeing blocks if we got a bad deleted inode */ 942 /* Avoid freeing blocks if we got a bad deleted inode */
842 if (inode->i_nlink == 0) 943 if (inode->i_nlink == 0)
843 inode->i_blocks = 0; 944 inode->i_blocks = 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8d9707746413..8ca2763df091 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -32,12 +32,23 @@
32#include <linux/string.h> 32#include <linux/string.h>
33#include <linux/buffer_head.h> 33#include <linux/buffer_head.h>
34#include <linux/writeback.h> 34#include <linux/writeback.h>
35#include <linux/pagevec.h>
35#include <linux/mpage.h> 36#include <linux/mpage.h>
36#include <linux/uio.h> 37#include <linux/uio.h>
37#include <linux/bio.h> 38#include <linux/bio.h>
38#include "ext4_jbd2.h" 39#include "ext4_jbd2.h"
39#include "xattr.h" 40#include "xattr.h"
40#include "acl.h" 41#include "acl.h"
42#include "ext4_extents.h"
43
44static inline int ext4_begin_ordered_truncate(struct inode *inode,
45 loff_t new_size)
46{
47 return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode,
48 new_size);
49}
50
51static void ext4_invalidatepage(struct page *page, unsigned long offset);
41 52
42/* 53/*
43 * Test whether an inode is a fast symlink. 54 * Test whether an inode is a fast symlink.
@@ -181,6 +192,8 @@ void ext4_delete_inode (struct inode * inode)
181{ 192{
182 handle_t *handle; 193 handle_t *handle;
183 194
195 if (ext4_should_order_data(inode))
196 ext4_begin_ordered_truncate(inode, 0);
184 truncate_inode_pages(&inode->i_data, 0); 197 truncate_inode_pages(&inode->i_data, 0);
185 198
186 if (is_bad_inode(inode)) 199 if (is_bad_inode(inode))
@@ -508,11 +521,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
508 * direct blocks 521 * direct blocks
509 */ 522 */
510static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, 523static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
511 ext4_fsblk_t goal, int indirect_blks, int blks, 524 ext4_lblk_t iblock, ext4_fsblk_t goal,
512 ext4_fsblk_t new_blocks[4], int *err) 525 int indirect_blks, int blks,
526 ext4_fsblk_t new_blocks[4], int *err)
513{ 527{
514 int target, i; 528 int target, i;
515 unsigned long count = 0; 529 unsigned long count = 0, blk_allocated = 0;
516 int index = 0; 530 int index = 0;
517 ext4_fsblk_t current_block = 0; 531 ext4_fsblk_t current_block = 0;
518 int ret = 0; 532 int ret = 0;
@@ -525,12 +539,13 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
525 * the first direct block of this branch. That's the 539 * the first direct block of this branch. That's the
526 * minimum number of blocks need to allocate(required) 540 * minimum number of blocks need to allocate(required)
527 */ 541 */
528 target = blks + indirect_blks; 542 /* first we try to allocate the indirect blocks */
529 543 target = indirect_blks;
530 while (1) { 544 while (target > 0) {
531 count = target; 545 count = target;
532 /* allocating blocks for indirect blocks and direct blocks */ 546 /* allocating blocks for indirect blocks and direct blocks */
533 current_block = ext4_new_blocks(handle,inode,goal,&count,err); 547 current_block = ext4_new_meta_blocks(handle, inode,
548 goal, &count, err);
534 if (*err) 549 if (*err)
535 goto failed_out; 550 goto failed_out;
536 551
@@ -540,16 +555,48 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
540 new_blocks[index++] = current_block++; 555 new_blocks[index++] = current_block++;
541 count--; 556 count--;
542 } 557 }
543 558 if (count > 0) {
544 if (count > 0) 559 /*
560 * save the new block number
561 * for the first direct block
562 */
563 new_blocks[index] = current_block;
564 printk(KERN_INFO "%s returned more blocks than "
565 "requested\n", __func__);
566 WARN_ON(1);
545 break; 567 break;
568 }
546 } 569 }
547 570
548 /* save the new block number for the first direct block */ 571 target = blks - count ;
549 new_blocks[index] = current_block; 572 blk_allocated = count;
550 573 if (!target)
574 goto allocated;
575 /* Now allocate data blocks */
576 count = target;
577 /* allocating blocks for data blocks */
578 current_block = ext4_new_blocks(handle, inode, iblock,
579 goal, &count, err);
580 if (*err && (target == blks)) {
581 /*
582 * if the allocation failed and we didn't allocate
583 * any blocks before
584 */
585 goto failed_out;
586 }
587 if (!*err) {
588 if (target == blks) {
589 /*
590 * save the new block number
591 * for the first direct block
592 */
593 new_blocks[index] = current_block;
594 }
595 blk_allocated += count;
596 }
597allocated:
551 /* total number of blocks allocated for direct blocks */ 598 /* total number of blocks allocated for direct blocks */
552 ret = count; 599 ret = blk_allocated;
553 *err = 0; 600 *err = 0;
554 return ret; 601 return ret;
555failed_out: 602failed_out:
@@ -584,8 +631,9 @@ failed_out:
584 * as described above and return 0. 631 * as described above and return 0.
585 */ 632 */
586static int ext4_alloc_branch(handle_t *handle, struct inode *inode, 633static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
587 int indirect_blks, int *blks, ext4_fsblk_t goal, 634 ext4_lblk_t iblock, int indirect_blks,
588 ext4_lblk_t *offsets, Indirect *branch) 635 int *blks, ext4_fsblk_t goal,
636 ext4_lblk_t *offsets, Indirect *branch)
589{ 637{
590 int blocksize = inode->i_sb->s_blocksize; 638 int blocksize = inode->i_sb->s_blocksize;
591 int i, n = 0; 639 int i, n = 0;
@@ -595,7 +643,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
595 ext4_fsblk_t new_blocks[4]; 643 ext4_fsblk_t new_blocks[4];
596 ext4_fsblk_t current_block; 644 ext4_fsblk_t current_block;
597 645
598 num = ext4_alloc_blocks(handle, inode, goal, indirect_blks, 646 num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks,
599 *blks, new_blocks, &err); 647 *blks, new_blocks, &err);
600 if (err) 648 if (err)
601 return err; 649 return err;
@@ -799,6 +847,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
799 struct ext4_inode_info *ei = EXT4_I(inode); 847 struct ext4_inode_info *ei = EXT4_I(inode);
800 int count = 0; 848 int count = 0;
801 ext4_fsblk_t first_block = 0; 849 ext4_fsblk_t first_block = 0;
850 loff_t disksize;
802 851
803 852
804 J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); 853 J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
@@ -855,8 +904,9 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
855 /* 904 /*
856 * Block out ext4_truncate while we alter the tree 905 * Block out ext4_truncate while we alter the tree
857 */ 906 */
858 err = ext4_alloc_branch(handle, inode, indirect_blks, &count, goal, 907 err = ext4_alloc_branch(handle, inode, iblock, indirect_blks,
859 offsets + (partial - chain), partial); 908 &count, goal,
909 offsets + (partial - chain), partial);
860 910
861 /* 911 /*
862 * The ext4_splice_branch call will free and forget any buffers 912 * The ext4_splice_branch call will free and forget any buffers
@@ -873,8 +923,13 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
873 * protect it if you're about to implement concurrent 923 * protect it if you're about to implement concurrent
874 * ext4_get_block() -bzzz 924 * ext4_get_block() -bzzz
875 */ 925 */
876 if (!err && extend_disksize && inode->i_size > ei->i_disksize) 926 if (!err && extend_disksize) {
877 ei->i_disksize = inode->i_size; 927 disksize = ((loff_t) iblock + count) << inode->i_blkbits;
928 if (disksize > i_size_read(inode))
929 disksize = i_size_read(inode);
930 if (disksize > ei->i_disksize)
931 ei->i_disksize = disksize;
932 }
878 if (err) 933 if (err)
879 goto cleanup; 934 goto cleanup;
880 935
@@ -934,7 +989,7 @@ out:
934 */ 989 */
935int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, 990int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
936 unsigned long max_blocks, struct buffer_head *bh, 991 unsigned long max_blocks, struct buffer_head *bh,
937 int create, int extend_disksize) 992 int create, int extend_disksize, int flag)
938{ 993{
939 int retval; 994 int retval;
940 995
@@ -975,6 +1030,15 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
975 * with create == 1 flag. 1030 * with create == 1 flag.
976 */ 1031 */
977 down_write((&EXT4_I(inode)->i_data_sem)); 1032 down_write((&EXT4_I(inode)->i_data_sem));
1033
1034 /*
1035 * if the caller is from delayed allocation writeout path
1036 * we have already reserved fs blocks for allocation
1037 * let the underlying get_block() function know to
1038 * avoid double accounting
1039 */
1040 if (flag)
1041 EXT4_I(inode)->i_delalloc_reserved_flag = 1;
978 /* 1042 /*
979 * We need to check for EXT4 here because migrate 1043 * We need to check for EXT4 here because migrate
980 * could have changed the inode type in between 1044 * could have changed the inode type in between
@@ -996,6 +1060,18 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
996 ~EXT4_EXT_MIGRATE; 1060 ~EXT4_EXT_MIGRATE;
997 } 1061 }
998 } 1062 }
1063
1064 if (flag) {
1065 EXT4_I(inode)->i_delalloc_reserved_flag = 0;
1066 /*
1067 * Update reserved blocks/metadata blocks
1068 * after successful block allocation
1069 * which were deferred till now
1070 */
1071 if ((retval > 0) && buffer_delay(bh))
1072 ext4_da_release_space(inode, retval, 0);
1073 }
1074
999 up_write((&EXT4_I(inode)->i_data_sem)); 1075 up_write((&EXT4_I(inode)->i_data_sem));
1000 return retval; 1076 return retval;
1001} 1077}
@@ -1021,7 +1097,7 @@ static int ext4_get_block(struct inode *inode, sector_t iblock,
1021 } 1097 }
1022 1098
1023 ret = ext4_get_blocks_wrap(handle, inode, iblock, 1099 ret = ext4_get_blocks_wrap(handle, inode, iblock,
1024 max_blocks, bh_result, create, 0); 1100 max_blocks, bh_result, create, 0, 0);
1025 if (ret > 0) { 1101 if (ret > 0) {
1026 bh_result->b_size = (ret << inode->i_blkbits); 1102 bh_result->b_size = (ret << inode->i_blkbits);
1027 ret = 0; 1103 ret = 0;
@@ -1047,7 +1123,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
1047 dummy.b_blocknr = -1000; 1123 dummy.b_blocknr = -1000;
1048 buffer_trace_init(&dummy.b_history); 1124 buffer_trace_init(&dummy.b_history);
1049 err = ext4_get_blocks_wrap(handle, inode, block, 1, 1125 err = ext4_get_blocks_wrap(handle, inode, block, 1,
1050 &dummy, create, 1); 1126 &dummy, create, 1, 0);
1051 /* 1127 /*
1052 * ext4_get_blocks_handle() returns number of blocks 1128 * ext4_get_blocks_handle() returns number of blocks
1053 * mapped. 0 in case of a HOLE. 1129 * mapped. 0 in case of a HOLE.
@@ -1203,19 +1279,20 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
1203 to = from + len; 1279 to = from + len;
1204 1280
1205retry: 1281retry:
1206 page = __grab_cache_page(mapping, index);
1207 if (!page)
1208 return -ENOMEM;
1209 *pagep = page;
1210
1211 handle = ext4_journal_start(inode, needed_blocks); 1282 handle = ext4_journal_start(inode, needed_blocks);
1212 if (IS_ERR(handle)) { 1283 if (IS_ERR(handle)) {
1213 unlock_page(page);
1214 page_cache_release(page);
1215 ret = PTR_ERR(handle); 1284 ret = PTR_ERR(handle);
1216 goto out; 1285 goto out;
1217 } 1286 }
1218 1287
1288 page = __grab_cache_page(mapping, index);
1289 if (!page) {
1290 ext4_journal_stop(handle);
1291 ret = -ENOMEM;
1292 goto out;
1293 }
1294 *pagep = page;
1295
1219 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 1296 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
1220 ext4_get_block); 1297 ext4_get_block);
1221 1298
@@ -1225,8 +1302,8 @@ retry:
1225 } 1302 }
1226 1303
1227 if (ret) { 1304 if (ret) {
1228 ext4_journal_stop(handle);
1229 unlock_page(page); 1305 unlock_page(page);
1306 ext4_journal_stop(handle);
1230 page_cache_release(page); 1307 page_cache_release(page);
1231 } 1308 }
1232 1309
@@ -1236,15 +1313,6 @@ out:
1236 return ret; 1313 return ret;
1237} 1314}
1238 1315
1239int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1240{
1241 int err = jbd2_journal_dirty_data(handle, bh);
1242 if (err)
1243 ext4_journal_abort_handle(__func__, __func__,
1244 bh, handle, err);
1245 return err;
1246}
1247
1248/* For write_end() in data=journal mode */ 1316/* For write_end() in data=journal mode */
1249static int write_end_fn(handle_t *handle, struct buffer_head *bh) 1317static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1250{ 1318{
@@ -1255,29 +1323,6 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1255} 1323}
1256 1324
1257/* 1325/*
1258 * Generic write_end handler for ordered and writeback ext4 journal modes.
1259 * We can't use generic_write_end, because that unlocks the page and we need to
1260 * unlock the page after ext4_journal_stop, but ext4_journal_stop must run
1261 * after block_write_end.
1262 */
1263static int ext4_generic_write_end(struct file *file,
1264 struct address_space *mapping,
1265 loff_t pos, unsigned len, unsigned copied,
1266 struct page *page, void *fsdata)
1267{
1268 struct inode *inode = file->f_mapping->host;
1269
1270 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1271
1272 if (pos+copied > inode->i_size) {
1273 i_size_write(inode, pos+copied);
1274 mark_inode_dirty(inode);
1275 }
1276
1277 return copied;
1278}
1279
1280/*
1281 * We need to pick up the new inode size which generic_commit_write gave us 1326 * We need to pick up the new inode size which generic_commit_write gave us
1282 * `file' can be NULL - eg, when called from page_symlink(). 1327 * `file' can be NULL - eg, when called from page_symlink().
1283 * 1328 *
@@ -1290,15 +1335,14 @@ static int ext4_ordered_write_end(struct file *file,
1290 struct page *page, void *fsdata) 1335 struct page *page, void *fsdata)
1291{ 1336{
1292 handle_t *handle = ext4_journal_current_handle(); 1337 handle_t *handle = ext4_journal_current_handle();
1293 struct inode *inode = file->f_mapping->host; 1338 struct inode *inode = mapping->host;
1294 unsigned from, to; 1339 unsigned from, to;
1295 int ret = 0, ret2; 1340 int ret = 0, ret2;
1296 1341
1297 from = pos & (PAGE_CACHE_SIZE - 1); 1342 from = pos & (PAGE_CACHE_SIZE - 1);
1298 to = from + len; 1343 to = from + len;
1299 1344
1300 ret = walk_page_buffers(handle, page_buffers(page), 1345 ret = ext4_jbd2_file_inode(handle, inode);
1301 from, to, NULL, ext4_journal_dirty_data);
1302 1346
1303 if (ret == 0) { 1347 if (ret == 0) {
1304 /* 1348 /*
@@ -1311,7 +1355,7 @@ static int ext4_ordered_write_end(struct file *file,
1311 new_i_size = pos + copied; 1355 new_i_size = pos + copied;
1312 if (new_i_size > EXT4_I(inode)->i_disksize) 1356 if (new_i_size > EXT4_I(inode)->i_disksize)
1313 EXT4_I(inode)->i_disksize = new_i_size; 1357 EXT4_I(inode)->i_disksize = new_i_size;
1314 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, 1358 ret2 = generic_write_end(file, mapping, pos, len, copied,
1315 page, fsdata); 1359 page, fsdata);
1316 copied = ret2; 1360 copied = ret2;
1317 if (ret2 < 0) 1361 if (ret2 < 0)
@@ -1320,8 +1364,6 @@ static int ext4_ordered_write_end(struct file *file,
1320 ret2 = ext4_journal_stop(handle); 1364 ret2 = ext4_journal_stop(handle);
1321 if (!ret) 1365 if (!ret)
1322 ret = ret2; 1366 ret = ret2;
1323 unlock_page(page);
1324 page_cache_release(page);
1325 1367
1326 return ret ? ret : copied; 1368 return ret ? ret : copied;
1327} 1369}
@@ -1332,7 +1374,7 @@ static int ext4_writeback_write_end(struct file *file,
1332 struct page *page, void *fsdata) 1374 struct page *page, void *fsdata)
1333{ 1375{
1334 handle_t *handle = ext4_journal_current_handle(); 1376 handle_t *handle = ext4_journal_current_handle();
1335 struct inode *inode = file->f_mapping->host; 1377 struct inode *inode = mapping->host;
1336 int ret = 0, ret2; 1378 int ret = 0, ret2;
1337 loff_t new_i_size; 1379 loff_t new_i_size;
1338 1380
@@ -1340,7 +1382,7 @@ static int ext4_writeback_write_end(struct file *file,
1340 if (new_i_size > EXT4_I(inode)->i_disksize) 1382 if (new_i_size > EXT4_I(inode)->i_disksize)
1341 EXT4_I(inode)->i_disksize = new_i_size; 1383 EXT4_I(inode)->i_disksize = new_i_size;
1342 1384
1343 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, 1385 ret2 = generic_write_end(file, mapping, pos, len, copied,
1344 page, fsdata); 1386 page, fsdata);
1345 copied = ret2; 1387 copied = ret2;
1346 if (ret2 < 0) 1388 if (ret2 < 0)
@@ -1349,8 +1391,6 @@ static int ext4_writeback_write_end(struct file *file,
1349 ret2 = ext4_journal_stop(handle); 1391 ret2 = ext4_journal_stop(handle);
1350 if (!ret) 1392 if (!ret)
1351 ret = ret2; 1393 ret = ret2;
1352 unlock_page(page);
1353 page_cache_release(page);
1354 1394
1355 return ret ? ret : copied; 1395 return ret ? ret : copied;
1356} 1396}
@@ -1389,14 +1429,965 @@ static int ext4_journalled_write_end(struct file *file,
1389 ret = ret2; 1429 ret = ret2;
1390 } 1430 }
1391 1431
1432 unlock_page(page);
1392 ret2 = ext4_journal_stop(handle); 1433 ret2 = ext4_journal_stop(handle);
1393 if (!ret) 1434 if (!ret)
1394 ret = ret2; 1435 ret = ret2;
1395 unlock_page(page);
1396 page_cache_release(page); 1436 page_cache_release(page);
1397 1437
1398 return ret ? ret : copied; 1438 return ret ? ret : copied;
1399} 1439}
1440/*
1441 * Calculate the number of metadata blocks need to reserve
1442 * to allocate @blocks for non extent file based file
1443 */
1444static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
1445{
1446 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1447 int ind_blks, dind_blks, tind_blks;
1448
1449 /* number of new indirect blocks needed */
1450 ind_blks = (blocks + icap - 1) / icap;
1451
1452 dind_blks = (ind_blks + icap - 1) / icap;
1453
1454 tind_blks = 1;
1455
1456 return ind_blks + dind_blks + tind_blks;
1457}
1458
1459/*
1460 * Calculate the number of metadata blocks need to reserve
1461 * to allocate given number of blocks
1462 */
1463static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
1464{
1465 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1466 return ext4_ext_calc_metadata_amount(inode, blocks);
1467
1468 return ext4_indirect_calc_metadata_amount(inode, blocks);
1469}
1470
1471static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1472{
1473 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1474 unsigned long md_needed, mdblocks, total = 0;
1475
1476 /*
1477 * recalculate the amount of metadata blocks to reserve
1478 * in order to allocate nrblocks
1479 * worse case is one extent per block
1480 */
1481 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1482 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
1483 mdblocks = ext4_calc_metadata_amount(inode, total);
1484 BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks);
1485
1486 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1487 total = md_needed + nrblocks;
1488
1489 if (ext4_has_free_blocks(sbi, total) < total) {
1490 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1491 return -ENOSPC;
1492 }
1493
1494 /* reduce fs free blocks counter */
1495 percpu_counter_sub(&sbi->s_freeblocks_counter, total);
1496
1497 EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
1498 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
1499
1500 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1501 return 0; /* success */
1502}
1503
1504void ext4_da_release_space(struct inode *inode, int used, int to_free)
1505{
1506 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1507 int total, mdb, mdb_free, release;
1508
1509 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1510 /* recalculate the number of metablocks still need to be reserved */
1511 total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free;
1512 mdb = ext4_calc_metadata_amount(inode, total);
1513
1514 /* figure out how many metablocks to release */
1515 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1516 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
1517
1518 /* Account for allocated meta_blocks */
1519 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
1520
1521 release = to_free + mdb_free;
1522
1523 /* update fs free blocks counter for truncate case */
1524 percpu_counter_add(&sbi->s_freeblocks_counter, release);
1525
1526 /* update per-inode reservations */
1527 BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks);
1528 EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free);
1529
1530 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1531 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1532 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1533 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1534}
1535
1536static void ext4_da_page_release_reservation(struct page *page,
1537 unsigned long offset)
1538{
1539 int to_release = 0;
1540 struct buffer_head *head, *bh;
1541 unsigned int curr_off = 0;
1542
1543 head = page_buffers(page);
1544 bh = head;
1545 do {
1546 unsigned int next_off = curr_off + bh->b_size;
1547
1548 if ((offset <= curr_off) && (buffer_delay(bh))) {
1549 to_release++;
1550 clear_buffer_delay(bh);
1551 }
1552 curr_off = next_off;
1553 } while ((bh = bh->b_this_page) != head);
1554 ext4_da_release_space(page->mapping->host, 0, to_release);
1555}
1556
1557/*
1558 * Delayed allocation stuff
1559 */
1560
1561struct mpage_da_data {
1562 struct inode *inode;
1563 struct buffer_head lbh; /* extent of blocks */
1564 unsigned long first_page, next_page; /* extent of pages */
1565 get_block_t *get_block;
1566 struct writeback_control *wbc;
1567};
1568
1569/*
1570 * mpage_da_submit_io - walks through extent of pages and try to write
1571 * them with __mpage_writepage()
1572 *
1573 * @mpd->inode: inode
1574 * @mpd->first_page: first page of the extent
1575 * @mpd->next_page: page after the last page of the extent
1576 * @mpd->get_block: the filesystem's block mapper function
1577 *
1578 * By the time mpage_da_submit_io() is called we expect all blocks
1579 * to be allocated. this may be wrong if allocation failed.
1580 *
1581 * As pages are already locked by write_cache_pages(), we can't use it
1582 */
1583static int mpage_da_submit_io(struct mpage_da_data *mpd)
1584{
1585 struct address_space *mapping = mpd->inode->i_mapping;
1586 struct mpage_data mpd_pp = {
1587 .bio = NULL,
1588 .last_block_in_bio = 0,
1589 .get_block = mpd->get_block,
1590 .use_writepage = 1,
1591 };
1592 int ret = 0, err, nr_pages, i;
1593 unsigned long index, end;
1594 struct pagevec pvec;
1595
1596 BUG_ON(mpd->next_page <= mpd->first_page);
1597
1598 pagevec_init(&pvec, 0);
1599 index = mpd->first_page;
1600 end = mpd->next_page - 1;
1601
1602 while (index <= end) {
1603 /* XXX: optimize tail */
1604 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
1605 if (nr_pages == 0)
1606 break;
1607 for (i = 0; i < nr_pages; i++) {
1608 struct page *page = pvec.pages[i];
1609
1610 index = page->index;
1611 if (index > end)
1612 break;
1613 index++;
1614
1615 err = __mpage_writepage(page, mpd->wbc, &mpd_pp);
1616
1617 /*
1618 * In error case, we have to continue because
1619 * remaining pages are still locked
1620 * XXX: unlock and re-dirty them?
1621 */
1622 if (ret == 0)
1623 ret = err;
1624 }
1625 pagevec_release(&pvec);
1626 }
1627 if (mpd_pp.bio)
1628 mpage_bio_submit(WRITE, mpd_pp.bio);
1629
1630 return ret;
1631}
1632
1633/*
1634 * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
1635 *
1636 * @mpd->inode - inode to walk through
1637 * @exbh->b_blocknr - first block on a disk
1638 * @exbh->b_size - amount of space in bytes
1639 * @logical - first logical block to start assignment with
1640 *
1641 * the function goes through all passed space and put actual disk
1642 * block numbers into buffer heads, dropping BH_Delay
1643 */
1644static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
1645 struct buffer_head *exbh)
1646{
1647 struct inode *inode = mpd->inode;
1648 struct address_space *mapping = inode->i_mapping;
1649 int blocks = exbh->b_size >> inode->i_blkbits;
1650 sector_t pblock = exbh->b_blocknr, cur_logical;
1651 struct buffer_head *head, *bh;
1652 unsigned long index, end;
1653 struct pagevec pvec;
1654 int nr_pages, i;
1655
1656 index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
1657 end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
1658 cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1659
1660 pagevec_init(&pvec, 0);
1661
1662 while (index <= end) {
1663 /* XXX: optimize tail */
1664 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
1665 if (nr_pages == 0)
1666 break;
1667 for (i = 0; i < nr_pages; i++) {
1668 struct page *page = pvec.pages[i];
1669
1670 index = page->index;
1671 if (index > end)
1672 break;
1673 index++;
1674
1675 BUG_ON(!PageLocked(page));
1676 BUG_ON(PageWriteback(page));
1677 BUG_ON(!page_has_buffers(page));
1678
1679 bh = page_buffers(page);
1680 head = bh;
1681
1682 /* skip blocks out of the range */
1683 do {
1684 if (cur_logical >= logical)
1685 break;
1686 cur_logical++;
1687 } while ((bh = bh->b_this_page) != head);
1688
1689 do {
1690 if (cur_logical >= logical + blocks)
1691 break;
1692 if (buffer_delay(bh)) {
1693 bh->b_blocknr = pblock;
1694 clear_buffer_delay(bh);
1695 } else if (buffer_mapped(bh))
1696 BUG_ON(bh->b_blocknr != pblock);
1697
1698 cur_logical++;
1699 pblock++;
1700 } while ((bh = bh->b_this_page) != head);
1701 }
1702 pagevec_release(&pvec);
1703 }
1704}
1705
1706
1707/*
1708 * __unmap_underlying_blocks - just a helper function to unmap
1709 * set of blocks described by @bh
1710 */
1711static inline void __unmap_underlying_blocks(struct inode *inode,
1712 struct buffer_head *bh)
1713{
1714 struct block_device *bdev = inode->i_sb->s_bdev;
1715 int blocks, i;
1716
1717 blocks = bh->b_size >> inode->i_blkbits;
1718 for (i = 0; i < blocks; i++)
1719 unmap_underlying_metadata(bdev, bh->b_blocknr + i);
1720}
1721
1722/*
1723 * mpage_da_map_blocks - go through given space
1724 *
1725 * @mpd->lbh - bh describing space
1726 * @mpd->get_block - the filesystem's block mapper function
1727 *
1728 * The function skips space we know is already mapped to disk blocks.
1729 *
1730 * The function ignores errors ->get_block() returns, thus real
1731 * error handling is postponed to __mpage_writepage()
1732 */
1733static void mpage_da_map_blocks(struct mpage_da_data *mpd)
1734{
1735 struct buffer_head *lbh = &mpd->lbh;
1736 int err = 0, remain = lbh->b_size;
1737 sector_t next = lbh->b_blocknr;
1738 struct buffer_head new;
1739
1740 /*
1741 * We consider only non-mapped and non-allocated blocks
1742 */
1743 if (buffer_mapped(lbh) && !buffer_delay(lbh))
1744 return;
1745
1746 while (remain) {
1747 new.b_state = lbh->b_state;
1748 new.b_blocknr = 0;
1749 new.b_size = remain;
1750 err = mpd->get_block(mpd->inode, next, &new, 1);
1751 if (err) {
1752 /*
1753 * Rather than implement own error handling
1754 * here, we just leave remaining blocks
1755 * unallocated and try again with ->writepage()
1756 */
1757 break;
1758 }
1759 BUG_ON(new.b_size == 0);
1760
1761 if (buffer_new(&new))
1762 __unmap_underlying_blocks(mpd->inode, &new);
1763
1764 /*
1765 * If blocks are delayed marked, we need to
1766 * put actual blocknr and drop delayed bit
1767 */
1768 if (buffer_delay(lbh))
1769 mpage_put_bnr_to_bhs(mpd, next, &new);
1770
1771 /* go for the remaining blocks */
1772 next += new.b_size >> mpd->inode->i_blkbits;
1773 remain -= new.b_size;
1774 }
1775}
1776
1777#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay))
1778
1779/*
1780 * mpage_add_bh_to_extent - try to add one more block to extent of blocks
1781 *
1782 * @mpd->lbh - extent of blocks
1783 * @logical - logical number of the block in the file
1784 * @bh - bh of the block (used to access block's state)
1785 *
1786 * the function is used to collect contig. blocks in same state
1787 */
1788static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1789 sector_t logical, struct buffer_head *bh)
1790{
1791 struct buffer_head *lbh = &mpd->lbh;
1792 sector_t next;
1793
1794 next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits);
1795
1796 /*
1797 * First block in the extent
1798 */
1799 if (lbh->b_size == 0) {
1800 lbh->b_blocknr = logical;
1801 lbh->b_size = bh->b_size;
1802 lbh->b_state = bh->b_state & BH_FLAGS;
1803 return;
1804 }
1805
1806 /*
1807 * Can we merge the block to our big extent?
1808 */
1809 if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) {
1810 lbh->b_size += bh->b_size;
1811 return;
1812 }
1813
1814 /*
1815 * We couldn't merge the block to our extent, so we
1816 * need to flush current extent and start new one
1817 */
1818 mpage_da_map_blocks(mpd);
1819
1820 /*
1821 * Now start a new extent
1822 */
1823 lbh->b_size = bh->b_size;
1824 lbh->b_state = bh->b_state & BH_FLAGS;
1825 lbh->b_blocknr = logical;
1826}
1827
1828/*
1829 * __mpage_da_writepage - finds extent of pages and blocks
1830 *
1831 * @page: page to consider
1832 * @wbc: not used, we just follow rules
1833 * @data: context
1834 *
1835 * The function finds extents of pages and scan them for all blocks.
1836 */
1837static int __mpage_da_writepage(struct page *page,
1838 struct writeback_control *wbc, void *data)
1839{
1840 struct mpage_da_data *mpd = data;
1841 struct inode *inode = mpd->inode;
1842 struct buffer_head *bh, *head, fake;
1843 sector_t logical;
1844
1845 /*
1846 * Can we merge this page to current extent?
1847 */
1848 if (mpd->next_page != page->index) {
1849 /*
1850 * Nope, we can't. So, we map non-allocated blocks
1851 * and start IO on them using __mpage_writepage()
1852 */
1853 if (mpd->next_page != mpd->first_page) {
1854 mpage_da_map_blocks(mpd);
1855 mpage_da_submit_io(mpd);
1856 }
1857
1858 /*
1859 * Start next extent of pages ...
1860 */
1861 mpd->first_page = page->index;
1862
1863 /*
1864 * ... and blocks
1865 */
1866 mpd->lbh.b_size = 0;
1867 mpd->lbh.b_state = 0;
1868 mpd->lbh.b_blocknr = 0;
1869 }
1870
1871 mpd->next_page = page->index + 1;
1872 logical = (sector_t) page->index <<
1873 (PAGE_CACHE_SHIFT - inode->i_blkbits);
1874
1875 if (!page_has_buffers(page)) {
1876 /*
1877 * There is no attached buffer heads yet (mmap?)
1878 * we treat the page asfull of dirty blocks
1879 */
1880 bh = &fake;
1881 bh->b_size = PAGE_CACHE_SIZE;
1882 bh->b_state = 0;
1883 set_buffer_dirty(bh);
1884 set_buffer_uptodate(bh);
1885 mpage_add_bh_to_extent(mpd, logical, bh);
1886 } else {
1887 /*
1888 * Page with regular buffer heads, just add all dirty ones
1889 */
1890 head = page_buffers(page);
1891 bh = head;
1892 do {
1893 BUG_ON(buffer_locked(bh));
1894 if (buffer_dirty(bh))
1895 mpage_add_bh_to_extent(mpd, logical, bh);
1896 logical++;
1897 } while ((bh = bh->b_this_page) != head);
1898 }
1899
1900 return 0;
1901}
1902
1903/*
1904 * mpage_da_writepages - walk the list of dirty pages of the given
1905 * address space, allocates non-allocated blocks, maps newly-allocated
1906 * blocks to existing bhs and issue IO them
1907 *
1908 * @mapping: address space structure to write
1909 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
1910 * @get_block: the filesystem's block mapper function.
1911 *
1912 * This is a library function, which implements the writepages()
1913 * address_space_operation.
1914 *
1915 * In order to avoid duplication of logic that deals with partial pages,
1916 * multiple bio per page, etc, we find non-allocated blocks, allocate
1917 * them with minimal calls to ->get_block() and re-use __mpage_writepage()
1918 *
1919 * It's important that we call __mpage_writepage() only once for each
1920 * involved page, otherwise we'd have to implement more complicated logic
1921 * to deal with pages w/o PG_lock or w/ PG_writeback and so on.
1922 *
1923 * See comments to mpage_writepages()
1924 */
1925static int mpage_da_writepages(struct address_space *mapping,
1926 struct writeback_control *wbc,
1927 get_block_t get_block)
1928{
1929 struct mpage_da_data mpd;
1930 int ret;
1931
1932 if (!get_block)
1933 return generic_writepages(mapping, wbc);
1934
1935 mpd.wbc = wbc;
1936 mpd.inode = mapping->host;
1937 mpd.lbh.b_size = 0;
1938 mpd.lbh.b_state = 0;
1939 mpd.lbh.b_blocknr = 0;
1940 mpd.first_page = 0;
1941 mpd.next_page = 0;
1942 mpd.get_block = get_block;
1943
1944 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
1945
1946 /*
1947 * Handle last extent of pages
1948 */
1949 if (mpd.next_page != mpd.first_page) {
1950 mpage_da_map_blocks(&mpd);
1951 mpage_da_submit_io(&mpd);
1952 }
1953
1954 return ret;
1955}
1956
1957/*
1958 * this is a special callback for ->write_begin() only
1959 * it's intention is to return mapped block or reserve space
1960 */
1961static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1962 struct buffer_head *bh_result, int create)
1963{
1964 int ret = 0;
1965
1966 BUG_ON(create == 0);
1967 BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
1968
1969 /*
1970 * first, we need to know whether the block is allocated already
1971 * preallocated blocks are unmapped but should treated
1972 * the same as allocated blocks.
1973 */
1974 ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0);
1975 if ((ret == 0) && !buffer_delay(bh_result)) {
1976 /* the block isn't (pre)allocated yet, let's reserve space */
1977 /*
1978 * XXX: __block_prepare_write() unmaps passed block,
1979 * is it OK?
1980 */
1981 ret = ext4_da_reserve_space(inode, 1);
1982 if (ret)
1983 /* not enough space to reserve */
1984 return ret;
1985
1986 map_bh(bh_result, inode->i_sb, 0);
1987 set_buffer_new(bh_result);
1988 set_buffer_delay(bh_result);
1989 } else if (ret > 0) {
1990 bh_result->b_size = (ret << inode->i_blkbits);
1991 ret = 0;
1992 }
1993
1994 return ret;
1995}
1996#define EXT4_DELALLOC_RSVED 1
1997static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
1998 struct buffer_head *bh_result, int create)
1999{
2000 int ret;
2001 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
2002 loff_t disksize = EXT4_I(inode)->i_disksize;
2003 handle_t *handle = NULL;
2004
2005 handle = ext4_journal_current_handle();
2006 if (!handle) {
2007 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
2008 bh_result, 0, 0, 0);
2009 BUG_ON(!ret);
2010 } else {
2011 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
2012 bh_result, create, 0, EXT4_DELALLOC_RSVED);
2013 }
2014
2015 if (ret > 0) {
2016 bh_result->b_size = (ret << inode->i_blkbits);
2017
2018 /*
2019 * Update on-disk size along with block allocation
2020 * we don't use 'extend_disksize' as size may change
2021 * within already allocated block -bzzz
2022 */
2023 disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
2024 if (disksize > i_size_read(inode))
2025 disksize = i_size_read(inode);
2026 if (disksize > EXT4_I(inode)->i_disksize) {
2027 /*
2028 * XXX: replace with spinlock if seen contended -bzzz
2029 */
2030 down_write(&EXT4_I(inode)->i_data_sem);
2031 if (disksize > EXT4_I(inode)->i_disksize)
2032 EXT4_I(inode)->i_disksize = disksize;
2033 up_write(&EXT4_I(inode)->i_data_sem);
2034
2035 if (EXT4_I(inode)->i_disksize == disksize) {
2036 ret = ext4_mark_inode_dirty(handle, inode);
2037 return ret;
2038 }
2039 }
2040 ret = 0;
2041 }
2042 return ret;
2043}
2044
2045static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
2046{
2047 /*
2048 * unmapped buffer is possible for holes.
2049 * delay buffer is possible with delayed allocation
2050 */
2051 return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh));
2052}
2053
2054static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
2055 struct buffer_head *bh_result, int create)
2056{
2057 int ret = 0;
2058 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
2059
2060 /*
2061 * we don't want to do block allocation in writepage
2062 * so call get_block_wrap with create = 0
2063 */
2064 ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks,
2065 bh_result, 0, 0, 0);
2066 if (ret > 0) {
2067 bh_result->b_size = (ret << inode->i_blkbits);
2068 ret = 0;
2069 }
2070 return ret;
2071}
2072
2073/*
2074 * get called vi ext4_da_writepages after taking page lock (have journal handle)
2075 * get called via journal_submit_inode_data_buffers (no journal handle)
2076 * get called via shrink_page_list via pdflush (no journal handle)
2077 * or grab_page_cache when doing write_begin (have journal handle)
2078 */
2079static int ext4_da_writepage(struct page *page,
2080 struct writeback_control *wbc)
2081{
2082 int ret = 0;
2083 loff_t size;
2084 unsigned long len;
2085 struct buffer_head *page_bufs;
2086 struct inode *inode = page->mapping->host;
2087
2088 size = i_size_read(inode);
2089 if (page->index == size >> PAGE_CACHE_SHIFT)
2090 len = size & ~PAGE_CACHE_MASK;
2091 else
2092 len = PAGE_CACHE_SIZE;
2093
2094 if (page_has_buffers(page)) {
2095 page_bufs = page_buffers(page);
2096 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2097 ext4_bh_unmapped_or_delay)) {
2098 /*
2099 * We don't want to do block allocation
2100 * So redirty the page and return
2101 * We may reach here when we do a journal commit
2102 * via journal_submit_inode_data_buffers.
2103 * If we don't have mapping block we just ignore
2104 * them. We can also reach here via shrink_page_list
2105 */
2106 redirty_page_for_writepage(wbc, page);
2107 unlock_page(page);
2108 return 0;
2109 }
2110 } else {
2111 /*
2112 * The test for page_has_buffers() is subtle:
2113 * We know the page is dirty but it lost buffers. That means
2114 * that at some moment in time after write_begin()/write_end()
2115 * has been called all buffers have been clean and thus they
2116 * must have been written at least once. So they are all
2117 * mapped and we can happily proceed with mapping them
2118 * and writing the page.
2119 *
2120 * Try to initialize the buffer_heads and check whether
2121 * all are mapped and non delay. We don't want to
2122 * do block allocation here.
2123 */
2124 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
2125 ext4_normal_get_block_write);
2126 if (!ret) {
2127 page_bufs = page_buffers(page);
2128 /* check whether all are mapped and non delay */
2129 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2130 ext4_bh_unmapped_or_delay)) {
2131 redirty_page_for_writepage(wbc, page);
2132 unlock_page(page);
2133 return 0;
2134 }
2135 } else {
2136 /*
2137 * We can't do block allocation here
2138 * so just redity the page and unlock
2139 * and return
2140 */
2141 redirty_page_for_writepage(wbc, page);
2142 unlock_page(page);
2143 return 0;
2144 }
2145 }
2146
2147 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
2148 ret = nobh_writepage(page, ext4_normal_get_block_write, wbc);
2149 else
2150 ret = block_write_full_page(page,
2151 ext4_normal_get_block_write,
2152 wbc);
2153
2154 return ret;
2155}
2156
2157/*
2158 * For now just follow the DIO way to estimate the max credits
2159 * needed to write out EXT4_MAX_WRITEBACK_PAGES.
2160 * todo: need to calculate the max credits need for
2161 * extent based files, currently the DIO credits is based on
2162 * indirect-blocks mapping way.
2163 *
2164 * Probably should have a generic way to calculate credits
2165 * for DIO, writepages, and truncate
2166 */
2167#define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS
2168#define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS
2169
2170static int ext4_da_writepages(struct address_space *mapping,
2171 struct writeback_control *wbc)
2172{
2173 struct inode *inode = mapping->host;
2174 handle_t *handle = NULL;
2175 int needed_blocks;
2176 int ret = 0;
2177 long to_write;
2178 loff_t range_start = 0;
2179
2180 /*
2181 * No pages to write? This is mainly a kludge to avoid starting
2182 * a transaction for special inodes like journal inode on last iput()
2183 * because that could violate lock ordering on umount
2184 */
2185 if (!mapping->nrpages)
2186 return 0;
2187
2188 /*
2189 * Estimate the worse case needed credits to write out
2190 * EXT4_MAX_BUF_BLOCKS pages
2191 */
2192 needed_blocks = EXT4_MAX_WRITEBACK_CREDITS;
2193
2194 to_write = wbc->nr_to_write;
2195 if (!wbc->range_cyclic) {
2196 /*
2197 * If range_cyclic is not set force range_cont
2198 * and save the old writeback_index
2199 */
2200 wbc->range_cont = 1;
2201 range_start = wbc->range_start;
2202 }
2203
2204 while (!ret && to_write) {
2205 /* start a new transaction*/
2206 handle = ext4_journal_start(inode, needed_blocks);
2207 if (IS_ERR(handle)) {
2208 ret = PTR_ERR(handle);
2209 goto out_writepages;
2210 }
2211 if (ext4_should_order_data(inode)) {
2212 /*
2213 * With ordered mode we need to add
2214 * the inode to the journal handle
2215 * when we do block allocation.
2216 */
2217 ret = ext4_jbd2_file_inode(handle, inode);
2218 if (ret) {
2219 ext4_journal_stop(handle);
2220 goto out_writepages;
2221 }
2222
2223 }
2224 /*
2225 * set the max dirty pages could be write at a time
2226 * to fit into the reserved transaction credits
2227 */
2228 if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES)
2229 wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES;
2230
2231 to_write -= wbc->nr_to_write;
2232 ret = mpage_da_writepages(mapping, wbc,
2233 ext4_da_get_block_write);
2234 ext4_journal_stop(handle);
2235 if (wbc->nr_to_write) {
2236 /*
2237 * There is no more writeout needed
2238 * or we requested for a noblocking writeout
2239 * and we found the device congested
2240 */
2241 to_write += wbc->nr_to_write;
2242 break;
2243 }
2244 wbc->nr_to_write = to_write;
2245 }
2246
2247out_writepages:
2248 wbc->nr_to_write = to_write;
2249 if (range_start)
2250 wbc->range_start = range_start;
2251 return ret;
2252}
2253
2254static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2255 loff_t pos, unsigned len, unsigned flags,
2256 struct page **pagep, void **fsdata)
2257{
2258 int ret, retries = 0;
2259 struct page *page;
2260 pgoff_t index;
2261 unsigned from, to;
2262 struct inode *inode = mapping->host;
2263 handle_t *handle;
2264
2265 index = pos >> PAGE_CACHE_SHIFT;
2266 from = pos & (PAGE_CACHE_SIZE - 1);
2267 to = from + len;
2268
2269retry:
2270 /*
2271 * With delayed allocation, we don't log the i_disksize update
2272 * if there is delayed block allocation. But we still need
2273 * to journalling the i_disksize update if writes to the end
2274 * of file which has an already mapped buffer.
2275 */
2276 handle = ext4_journal_start(inode, 1);
2277 if (IS_ERR(handle)) {
2278 ret = PTR_ERR(handle);
2279 goto out;
2280 }
2281
2282 page = __grab_cache_page(mapping, index);
2283 if (!page)
2284 return -ENOMEM;
2285 *pagep = page;
2286
2287 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
2288 ext4_da_get_block_prep);
2289 if (ret < 0) {
2290 unlock_page(page);
2291 ext4_journal_stop(handle);
2292 page_cache_release(page);
2293 }
2294
2295 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
2296 goto retry;
2297out:
2298 return ret;
2299}
2300
2301/*
2302 * Check if we should update i_disksize
2303 * when write to the end of file but not require block allocation
2304 */
2305static int ext4_da_should_update_i_disksize(struct page *page,
2306 unsigned long offset)
2307{
2308 struct buffer_head *bh;
2309 struct inode *inode = page->mapping->host;
2310 unsigned int idx;
2311 int i;
2312
2313 bh = page_buffers(page);
2314 idx = offset >> inode->i_blkbits;
2315
2316 for (i=0; i < idx; i++)
2317 bh = bh->b_this_page;
2318
2319 if (!buffer_mapped(bh) || (buffer_delay(bh)))
2320 return 0;
2321 return 1;
2322}
2323
2324static int ext4_da_write_end(struct file *file,
2325 struct address_space *mapping,
2326 loff_t pos, unsigned len, unsigned copied,
2327 struct page *page, void *fsdata)
2328{
2329 struct inode *inode = mapping->host;
2330 int ret = 0, ret2;
2331 handle_t *handle = ext4_journal_current_handle();
2332 loff_t new_i_size;
2333 unsigned long start, end;
2334
2335 start = pos & (PAGE_CACHE_SIZE - 1);
2336 end = start + copied -1;
2337
2338 /*
2339 * generic_write_end() will run mark_inode_dirty() if i_size
2340 * changes. So let's piggyback the i_disksize mark_inode_dirty
2341 * into that.
2342 */
2343
2344 new_i_size = pos + copied;
2345 if (new_i_size > EXT4_I(inode)->i_disksize) {
2346 if (ext4_da_should_update_i_disksize(page, end)) {
2347 down_write(&EXT4_I(inode)->i_data_sem);
2348 if (new_i_size > EXT4_I(inode)->i_disksize) {
2349 /*
2350 * Updating i_disksize when extending file
2351 * without needing block allocation
2352 */
2353 if (ext4_should_order_data(inode))
2354 ret = ext4_jbd2_file_inode(handle,
2355 inode);
2356
2357 EXT4_I(inode)->i_disksize = new_i_size;
2358 }
2359 up_write(&EXT4_I(inode)->i_data_sem);
2360 }
2361 }
2362 ret2 = generic_write_end(file, mapping, pos, len, copied,
2363 page, fsdata);
2364 copied = ret2;
2365 if (ret2 < 0)
2366 ret = ret2;
2367 ret2 = ext4_journal_stop(handle);
2368 if (!ret)
2369 ret = ret2;
2370
2371 return ret ? ret : copied;
2372}
2373
2374static void ext4_da_invalidatepage(struct page *page, unsigned long offset)
2375{
2376 /*
2377 * Drop reserved blocks
2378 */
2379 BUG_ON(!PageLocked(page));
2380 if (!page_has_buffers(page))
2381 goto out;
2382
2383 ext4_da_page_release_reservation(page, offset);
2384
2385out:
2386 ext4_invalidatepage(page, offset);
2387
2388 return;
2389}
2390
1400 2391
1401/* 2392/*
1402 * bmap() is special. It gets used by applications such as lilo and by 2393 * bmap() is special. It gets used by applications such as lilo and by
@@ -1418,6 +2409,16 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
1418 journal_t *journal; 2409 journal_t *journal;
1419 int err; 2410 int err;
1420 2411
2412 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
2413 test_opt(inode->i_sb, DELALLOC)) {
2414 /*
2415 * With delalloc we want to sync the file
2416 * so that we can make sure we allocate
2417 * blocks for file
2418 */
2419 filemap_write_and_wait(mapping);
2420 }
2421
1421 if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { 2422 if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
1422 /* 2423 /*
1423 * This is a REALLY heavyweight approach, but the use of 2424 * This is a REALLY heavyweight approach, but the use of
@@ -1462,21 +2463,17 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
1462 return 0; 2463 return 0;
1463} 2464}
1464 2465
1465static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
1466{
1467 if (buffer_mapped(bh))
1468 return ext4_journal_dirty_data(handle, bh);
1469 return 0;
1470}
1471
1472/* 2466/*
1473 * Note that we always start a transaction even if we're not journalling 2467 * Note that we don't need to start a transaction unless we're journaling data
1474 * data. This is to preserve ordering: any hole instantiation within 2468 * because we should have holes filled from ext4_page_mkwrite(). We even don't
1475 * __block_write_full_page -> ext4_get_block() should be journalled 2469 * need to file the inode to the transaction's list in ordered mode because if
1476 * along with the data so we don't crash and then get metadata which 2470 * we are writing back data added by write(), the inode is already there and if
1477 * refers to old data. 2471 * we are writing back data modified via mmap(), noone guarantees in which
2472 * transaction the data will hit the disk. In case we are journaling data, we
2473 * cannot start transaction directly because transaction start ranks above page
2474 * lock so we have to do some magic.
1478 * 2475 *
1479 * In all journalling modes block_write_full_page() will start the I/O. 2476 * In all journaling modes block_write_full_page() will start the I/O.
1480 * 2477 *
1481 * Problem: 2478 * Problem:
1482 * 2479 *
@@ -1518,105 +2515,103 @@ static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
1518 * disastrous. Any write() or metadata operation will sync the fs for 2515 * disastrous. Any write() or metadata operation will sync the fs for
1519 * us. 2516 * us.
1520 * 2517 *
1521 * AKPM2: if all the page's buffers are mapped to disk and !data=journal,
1522 * we don't need to open a transaction here.
1523 */ 2518 */
1524static int ext4_ordered_writepage(struct page *page, 2519static int __ext4_normal_writepage(struct page *page,
1525 struct writeback_control *wbc) 2520 struct writeback_control *wbc)
1526{ 2521{
1527 struct inode *inode = page->mapping->host; 2522 struct inode *inode = page->mapping->host;
1528 struct buffer_head *page_bufs;
1529 handle_t *handle = NULL;
1530 int ret = 0;
1531 int err;
1532
1533 J_ASSERT(PageLocked(page));
1534
1535 /*
1536 * We give up here if we're reentered, because it might be for a
1537 * different filesystem.
1538 */
1539 if (ext4_journal_current_handle())
1540 goto out_fail;
1541 2523
1542 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); 2524 if (test_opt(inode->i_sb, NOBH))
2525 return nobh_writepage(page,
2526 ext4_normal_get_block_write, wbc);
2527 else
2528 return block_write_full_page(page,
2529 ext4_normal_get_block_write,
2530 wbc);
2531}
1543 2532
1544 if (IS_ERR(handle)) { 2533static int ext4_normal_writepage(struct page *page,
1545 ret = PTR_ERR(handle); 2534 struct writeback_control *wbc)
1546 goto out_fail; 2535{
1547 } 2536 struct inode *inode = page->mapping->host;
2537 loff_t size = i_size_read(inode);
2538 loff_t len;
1548 2539
1549 if (!page_has_buffers(page)) { 2540 J_ASSERT(PageLocked(page));
1550 create_empty_buffers(page, inode->i_sb->s_blocksize, 2541 if (page->index == size >> PAGE_CACHE_SHIFT)
1551 (1 << BH_Dirty)|(1 << BH_Uptodate)); 2542 len = size & ~PAGE_CACHE_MASK;
2543 else
2544 len = PAGE_CACHE_SIZE;
2545
2546 if (page_has_buffers(page)) {
2547 /* if page has buffers it should all be mapped
2548 * and allocated. If there are not buffers attached
2549 * to the page we know the page is dirty but it lost
2550 * buffers. That means that at some moment in time
2551 * after write_begin() / write_end() has been called
2552 * all buffers have been clean and thus they must have been
2553 * written at least once. So they are all mapped and we can
2554 * happily proceed with mapping them and writing the page.
2555 */
2556 BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
2557 ext4_bh_unmapped_or_delay));
1552 } 2558 }
1553 page_bufs = page_buffers(page);
1554 walk_page_buffers(handle, page_bufs, 0,
1555 PAGE_CACHE_SIZE, NULL, bget_one);
1556
1557 ret = block_write_full_page(page, ext4_get_block, wbc);
1558 2559
1559 /* 2560 if (!ext4_journal_current_handle())
1560 * The page can become unlocked at any point now, and 2561 return __ext4_normal_writepage(page, wbc);
1561 * truncate can then come in and change things. So we
1562 * can't touch *page from now on. But *page_bufs is
1563 * safe due to elevated refcount.
1564 */
1565 2562
1566 /*
1567 * And attach them to the current transaction. But only if
1568 * block_write_full_page() succeeded. Otherwise they are unmapped,
1569 * and generally junk.
1570 */
1571 if (ret == 0) {
1572 err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
1573 NULL, jbd2_journal_dirty_data_fn);
1574 if (!ret)
1575 ret = err;
1576 }
1577 walk_page_buffers(handle, page_bufs, 0,
1578 PAGE_CACHE_SIZE, NULL, bput_one);
1579 err = ext4_journal_stop(handle);
1580 if (!ret)
1581 ret = err;
1582 return ret;
1583
1584out_fail:
1585 redirty_page_for_writepage(wbc, page); 2563 redirty_page_for_writepage(wbc, page);
1586 unlock_page(page); 2564 unlock_page(page);
1587 return ret; 2565 return 0;
1588} 2566}
1589 2567
1590static int ext4_writeback_writepage(struct page *page, 2568static int __ext4_journalled_writepage(struct page *page,
1591 struct writeback_control *wbc) 2569 struct writeback_control *wbc)
1592{ 2570{
1593 struct inode *inode = page->mapping->host; 2571 struct address_space *mapping = page->mapping;
2572 struct inode *inode = mapping->host;
2573 struct buffer_head *page_bufs;
1594 handle_t *handle = NULL; 2574 handle_t *handle = NULL;
1595 int ret = 0; 2575 int ret = 0;
1596 int err; 2576 int err;
1597 2577
1598 if (ext4_journal_current_handle()) 2578 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
1599 goto out_fail; 2579 ext4_normal_get_block_write);
2580 if (ret != 0)
2581 goto out_unlock;
2582
2583 page_bufs = page_buffers(page);
2584 walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL,
2585 bget_one);
2586 /* As soon as we unlock the page, it can go away, but we have
2587 * references to buffers so we are safe */
2588 unlock_page(page);
1600 2589
1601 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); 2590 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
1602 if (IS_ERR(handle)) { 2591 if (IS_ERR(handle)) {
1603 ret = PTR_ERR(handle); 2592 ret = PTR_ERR(handle);
1604 goto out_fail; 2593 goto out;
1605 } 2594 }
1606 2595
1607 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2596 ret = walk_page_buffers(handle, page_bufs, 0,
1608 ret = nobh_writepage(page, ext4_get_block, wbc); 2597 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
1609 else
1610 ret = block_write_full_page(page, ext4_get_block, wbc);
1611 2598
2599 err = walk_page_buffers(handle, page_bufs, 0,
2600 PAGE_CACHE_SIZE, NULL, write_end_fn);
2601 if (ret == 0)
2602 ret = err;
1612 err = ext4_journal_stop(handle); 2603 err = ext4_journal_stop(handle);
1613 if (!ret) 2604 if (!ret)
1614 ret = err; 2605 ret = err;
1615 return ret;
1616 2606
1617out_fail: 2607 walk_page_buffers(handle, page_bufs, 0,
1618 redirty_page_for_writepage(wbc, page); 2608 PAGE_CACHE_SIZE, NULL, bput_one);
2609 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
2610 goto out;
2611
2612out_unlock:
1619 unlock_page(page); 2613 unlock_page(page);
2614out:
1620 return ret; 2615 return ret;
1621} 2616}
1622 2617
@@ -1624,59 +2619,53 @@ static int ext4_journalled_writepage(struct page *page,
1624 struct writeback_control *wbc) 2619 struct writeback_control *wbc)
1625{ 2620{
1626 struct inode *inode = page->mapping->host; 2621 struct inode *inode = page->mapping->host;
1627 handle_t *handle = NULL; 2622 loff_t size = i_size_read(inode);
1628 int ret = 0; 2623 loff_t len;
1629 int err;
1630 2624
1631 if (ext4_journal_current_handle()) 2625 J_ASSERT(PageLocked(page));
1632 goto no_write; 2626 if (page->index == size >> PAGE_CACHE_SHIFT)
2627 len = size & ~PAGE_CACHE_MASK;
2628 else
2629 len = PAGE_CACHE_SIZE;
2630
2631 if (page_has_buffers(page)) {
2632 /* if page has buffers it should all be mapped
2633 * and allocated. If there are not buffers attached
2634 * to the page we know the page is dirty but it lost
2635 * buffers. That means that at some moment in time
2636 * after write_begin() / write_end() has been called
2637 * all buffers have been clean and thus they must have been
2638 * written at least once. So they are all mapped and we can
2639 * happily proceed with mapping them and writing the page.
2640 */
2641 BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
2642 ext4_bh_unmapped_or_delay));
2643 }
1633 2644
1634 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); 2645 if (ext4_journal_current_handle())
1635 if (IS_ERR(handle)) {
1636 ret = PTR_ERR(handle);
1637 goto no_write; 2646 goto no_write;
1638 }
1639 2647
1640 if (!page_has_buffers(page) || PageChecked(page)) { 2648 if (PageChecked(page)) {
1641 /* 2649 /*
1642 * It's mmapped pagecache. Add buffers and journal it. There 2650 * It's mmapped pagecache. Add buffers and journal it. There
1643 * doesn't seem much point in redirtying the page here. 2651 * doesn't seem much point in redirtying the page here.
1644 */ 2652 */
1645 ClearPageChecked(page); 2653 ClearPageChecked(page);
1646 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, 2654 return __ext4_journalled_writepage(page, wbc);
1647 ext4_get_block);
1648 if (ret != 0) {
1649 ext4_journal_stop(handle);
1650 goto out_unlock;
1651 }
1652 ret = walk_page_buffers(handle, page_buffers(page), 0,
1653 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
1654
1655 err = walk_page_buffers(handle, page_buffers(page), 0,
1656 PAGE_CACHE_SIZE, NULL, write_end_fn);
1657 if (ret == 0)
1658 ret = err;
1659 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
1660 unlock_page(page);
1661 } else { 2655 } else {
1662 /* 2656 /*
1663 * It may be a page full of checkpoint-mode buffers. We don't 2657 * It may be a page full of checkpoint-mode buffers. We don't
1664 * really know unless we go poke around in the buffer_heads. 2658 * really know unless we go poke around in the buffer_heads.
1665 * But block_write_full_page will do the right thing. 2659 * But block_write_full_page will do the right thing.
1666 */ 2660 */
1667 ret = block_write_full_page(page, ext4_get_block, wbc); 2661 return block_write_full_page(page,
2662 ext4_normal_get_block_write,
2663 wbc);
1668 } 2664 }
1669 err = ext4_journal_stop(handle);
1670 if (!ret)
1671 ret = err;
1672out:
1673 return ret;
1674
1675no_write: 2665no_write:
1676 redirty_page_for_writepage(wbc, page); 2666 redirty_page_for_writepage(wbc, page);
1677out_unlock:
1678 unlock_page(page); 2667 unlock_page(page);
1679 goto out; 2668 return 0;
1680} 2669}
1681 2670
1682static int ext4_readpage(struct file *file, struct page *page) 2671static int ext4_readpage(struct file *file, struct page *page)
@@ -1819,7 +2808,7 @@ static int ext4_journalled_set_page_dirty(struct page *page)
1819static const struct address_space_operations ext4_ordered_aops = { 2808static const struct address_space_operations ext4_ordered_aops = {
1820 .readpage = ext4_readpage, 2809 .readpage = ext4_readpage,
1821 .readpages = ext4_readpages, 2810 .readpages = ext4_readpages,
1822 .writepage = ext4_ordered_writepage, 2811 .writepage = ext4_normal_writepage,
1823 .sync_page = block_sync_page, 2812 .sync_page = block_sync_page,
1824 .write_begin = ext4_write_begin, 2813 .write_begin = ext4_write_begin,
1825 .write_end = ext4_ordered_write_end, 2814 .write_end = ext4_ordered_write_end,
@@ -1833,7 +2822,7 @@ static const struct address_space_operations ext4_ordered_aops = {
1833static const struct address_space_operations ext4_writeback_aops = { 2822static const struct address_space_operations ext4_writeback_aops = {
1834 .readpage = ext4_readpage, 2823 .readpage = ext4_readpage,
1835 .readpages = ext4_readpages, 2824 .readpages = ext4_readpages,
1836 .writepage = ext4_writeback_writepage, 2825 .writepage = ext4_normal_writepage,
1837 .sync_page = block_sync_page, 2826 .sync_page = block_sync_page,
1838 .write_begin = ext4_write_begin, 2827 .write_begin = ext4_write_begin,
1839 .write_end = ext4_writeback_write_end, 2828 .write_end = ext4_writeback_write_end,
@@ -1857,10 +2846,31 @@ static const struct address_space_operations ext4_journalled_aops = {
1857 .releasepage = ext4_releasepage, 2846 .releasepage = ext4_releasepage,
1858}; 2847};
1859 2848
2849static const struct address_space_operations ext4_da_aops = {
2850 .readpage = ext4_readpage,
2851 .readpages = ext4_readpages,
2852 .writepage = ext4_da_writepage,
2853 .writepages = ext4_da_writepages,
2854 .sync_page = block_sync_page,
2855 .write_begin = ext4_da_write_begin,
2856 .write_end = ext4_da_write_end,
2857 .bmap = ext4_bmap,
2858 .invalidatepage = ext4_da_invalidatepage,
2859 .releasepage = ext4_releasepage,
2860 .direct_IO = ext4_direct_IO,
2861 .migratepage = buffer_migrate_page,
2862};
2863
1860void ext4_set_aops(struct inode *inode) 2864void ext4_set_aops(struct inode *inode)
1861{ 2865{
1862 if (ext4_should_order_data(inode)) 2866 if (ext4_should_order_data(inode) &&
2867 test_opt(inode->i_sb, DELALLOC))
2868 inode->i_mapping->a_ops = &ext4_da_aops;
2869 else if (ext4_should_order_data(inode))
1863 inode->i_mapping->a_ops = &ext4_ordered_aops; 2870 inode->i_mapping->a_ops = &ext4_ordered_aops;
2871 else if (ext4_should_writeback_data(inode) &&
2872 test_opt(inode->i_sb, DELALLOC))
2873 inode->i_mapping->a_ops = &ext4_da_aops;
1864 else if (ext4_should_writeback_data(inode)) 2874 else if (ext4_should_writeback_data(inode))
1865 inode->i_mapping->a_ops = &ext4_writeback_aops; 2875 inode->i_mapping->a_ops = &ext4_writeback_aops;
1866 else 2876 else
@@ -1873,7 +2883,7 @@ void ext4_set_aops(struct inode *inode)
1873 * This required during truncate. We need to physically zero the tail end 2883 * This required during truncate. We need to physically zero the tail end
1874 * of that block so it doesn't yield old data if the file is later grown. 2884 * of that block so it doesn't yield old data if the file is later grown.
1875 */ 2885 */
1876int ext4_block_truncate_page(handle_t *handle, struct page *page, 2886int ext4_block_truncate_page(handle_t *handle,
1877 struct address_space *mapping, loff_t from) 2887 struct address_space *mapping, loff_t from)
1878{ 2888{
1879 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; 2889 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
@@ -1882,8 +2892,13 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
1882 ext4_lblk_t iblock; 2892 ext4_lblk_t iblock;
1883 struct inode *inode = mapping->host; 2893 struct inode *inode = mapping->host;
1884 struct buffer_head *bh; 2894 struct buffer_head *bh;
2895 struct page *page;
1885 int err = 0; 2896 int err = 0;
1886 2897
2898 page = grab_cache_page(mapping, from >> PAGE_CACHE_SHIFT);
2899 if (!page)
2900 return -EINVAL;
2901
1887 blocksize = inode->i_sb->s_blocksize; 2902 blocksize = inode->i_sb->s_blocksize;
1888 length = blocksize - (offset & (blocksize - 1)); 2903 length = blocksize - (offset & (blocksize - 1));
1889 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 2904 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
@@ -1956,7 +2971,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
1956 err = ext4_journal_dirty_metadata(handle, bh); 2971 err = ext4_journal_dirty_metadata(handle, bh);
1957 } else { 2972 } else {
1958 if (ext4_should_order_data(inode)) 2973 if (ext4_should_order_data(inode))
1959 err = ext4_journal_dirty_data(handle, bh); 2974 err = ext4_jbd2_file_inode(handle, inode);
1960 mark_buffer_dirty(bh); 2975 mark_buffer_dirty(bh);
1961 } 2976 }
1962 2977
@@ -2179,7 +3194,21 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
2179 3194
2180 if (this_bh) { 3195 if (this_bh) {
2181 BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata"); 3196 BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata");
2182 ext4_journal_dirty_metadata(handle, this_bh); 3197
3198 /*
3199 * The buffer head should have an attached journal head at this
3200 * point. However, if the data is corrupted and an indirect
3201 * block pointed to itself, it would have been detached when
3202 * the block was cleared. Check for this instead of OOPSing.
3203 */
3204 if (bh2jh(this_bh))
3205 ext4_journal_dirty_metadata(handle, this_bh);
3206 else
3207 ext4_error(inode->i_sb, __func__,
3208 "circular indirect block detected, "
3209 "inode=%lu, block=%llu",
3210 inode->i_ino,
3211 (unsigned long long) this_bh->b_blocknr);
2183 } 3212 }
2184} 3213}
2185 3214
@@ -2305,6 +3334,19 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
2305 } 3334 }
2306} 3335}
2307 3336
3337int ext4_can_truncate(struct inode *inode)
3338{
3339 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
3340 return 0;
3341 if (S_ISREG(inode->i_mode))
3342 return 1;
3343 if (S_ISDIR(inode->i_mode))
3344 return 1;
3345 if (S_ISLNK(inode->i_mode))
3346 return !ext4_inode_is_fast_symlink(inode);
3347 return 0;
3348}
3349
2308/* 3350/*
2309 * ext4_truncate() 3351 * ext4_truncate()
2310 * 3352 *
@@ -2347,51 +3389,25 @@ void ext4_truncate(struct inode *inode)
2347 int n; 3389 int n;
2348 ext4_lblk_t last_block; 3390 ext4_lblk_t last_block;
2349 unsigned blocksize = inode->i_sb->s_blocksize; 3391 unsigned blocksize = inode->i_sb->s_blocksize;
2350 struct page *page;
2351 3392
2352 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 3393 if (!ext4_can_truncate(inode))
2353 S_ISLNK(inode->i_mode)))
2354 return;
2355 if (ext4_inode_is_fast_symlink(inode))
2356 return;
2357 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2358 return; 3394 return;
2359 3395
2360 /*
2361 * We have to lock the EOF page here, because lock_page() nests
2362 * outside jbd2_journal_start().
2363 */
2364 if ((inode->i_size & (blocksize - 1)) == 0) {
2365 /* Block boundary? Nothing to do */
2366 page = NULL;
2367 } else {
2368 page = grab_cache_page(mapping,
2369 inode->i_size >> PAGE_CACHE_SHIFT);
2370 if (!page)
2371 return;
2372 }
2373
2374 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 3396 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
2375 ext4_ext_truncate(inode, page); 3397 ext4_ext_truncate(inode);
2376 return; 3398 return;
2377 } 3399 }
2378 3400
2379 handle = start_transaction(inode); 3401 handle = start_transaction(inode);
2380 if (IS_ERR(handle)) { 3402 if (IS_ERR(handle))
2381 if (page) {
2382 clear_highpage(page);
2383 flush_dcache_page(page);
2384 unlock_page(page);
2385 page_cache_release(page);
2386 }
2387 return; /* AKPM: return what? */ 3403 return; /* AKPM: return what? */
2388 }
2389 3404
2390 last_block = (inode->i_size + blocksize-1) 3405 last_block = (inode->i_size + blocksize-1)
2391 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); 3406 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
2392 3407
2393 if (page) 3408 if (inode->i_size & (blocksize - 1))
2394 ext4_block_truncate_page(handle, page, mapping, inode->i_size); 3409 if (ext4_block_truncate_page(handle, mapping, inode->i_size))
3410 goto out_stop;
2395 3411
2396 n = ext4_block_to_path(inode, last_block, offsets, NULL); 3412 n = ext4_block_to_path(inode, last_block, offsets, NULL);
2397 if (n == 0) 3413 if (n == 0)
@@ -2410,6 +3426,11 @@ void ext4_truncate(struct inode *inode)
2410 goto out_stop; 3426 goto out_stop;
2411 3427
2412 /* 3428 /*
3429 * From here we block out all ext4_get_block() callers who want to
3430 * modify the block allocation tree.
3431 */
3432 down_write(&ei->i_data_sem);
3433 /*
2413 * The orphan list entry will now protect us from any crash which 3434 * The orphan list entry will now protect us from any crash which
2414 * occurs before the truncate completes, so it is now safe to propagate 3435 * occurs before the truncate completes, so it is now safe to propagate
2415 * the new, shorter inode size (held for now in i_size) into the 3436 * the new, shorter inode size (held for now in i_size) into the
@@ -2418,12 +3439,6 @@ void ext4_truncate(struct inode *inode)
2418 */ 3439 */
2419 ei->i_disksize = inode->i_size; 3440 ei->i_disksize = inode->i_size;
2420 3441
2421 /*
2422 * From here we block out all ext4_get_block() callers who want to
2423 * modify the block allocation tree.
2424 */
2425 down_write(&ei->i_data_sem);
2426
2427 if (n == 1) { /* direct blocks */ 3442 if (n == 1) { /* direct blocks */
2428 ext4_free_data(handle, inode, NULL, i_data+offsets[0], 3443 ext4_free_data(handle, inode, NULL, i_data+offsets[0],
2429 i_data + EXT4_NDIR_BLOCKS); 3444 i_data + EXT4_NDIR_BLOCKS);
@@ -3107,7 +4122,14 @@ int ext4_write_inode(struct inode *inode, int wait)
3107 * be freed, so we have a strong guarantee that no future commit will 4122 * be freed, so we have a strong guarantee that no future commit will
3108 * leave these blocks visible to the user.) 4123 * leave these blocks visible to the user.)
3109 * 4124 *
3110 * Called with inode->sem down. 4125 * Another thing we have to assure is that if we are in ordered mode
4126 * and inode is still attached to the committing transaction, we must
4127 * we start writeout of all the dirty pages which are being truncated.
4128 * This way we are sure that all the data written in the previous
4129 * transaction are already on disk (truncate waits for pages under
4130 * writeback).
4131 *
4132 * Called with inode->i_mutex down.
3111 */ 4133 */
3112int ext4_setattr(struct dentry *dentry, struct iattr *attr) 4134int ext4_setattr(struct dentry *dentry, struct iattr *attr)
3113{ 4135{
@@ -3173,6 +4195,22 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
3173 if (!error) 4195 if (!error)
3174 error = rc; 4196 error = rc;
3175 ext4_journal_stop(handle); 4197 ext4_journal_stop(handle);
4198
4199 if (ext4_should_order_data(inode)) {
4200 error = ext4_begin_ordered_truncate(inode,
4201 attr->ia_size);
4202 if (error) {
4203 /* Do as much error cleanup as possible */
4204 handle = ext4_journal_start(inode, 3);
4205 if (IS_ERR(handle)) {
4206 ext4_orphan_del(NULL, inode);
4207 goto err_out;
4208 }
4209 ext4_orphan_del(handle, inode);
4210 ext4_journal_stop(handle);
4211 goto err_out;
4212 }
4213 }
3176 } 4214 }
3177 4215
3178 rc = inode_setattr(inode, attr); 4216 rc = inode_setattr(inode, attr);
@@ -3193,6 +4231,32 @@ err_out:
3193 return error; 4231 return error;
3194} 4232}
3195 4233
4234int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
4235 struct kstat *stat)
4236{
4237 struct inode *inode;
4238 unsigned long delalloc_blocks;
4239
4240 inode = dentry->d_inode;
4241 generic_fillattr(inode, stat);
4242
4243 /*
4244 * We can't update i_blocks if the block allocation is delayed
4245 * otherwise in the case of system crash before the real block
4246 * allocation is done, we will have i_blocks inconsistent with
4247 * on-disk file blocks.
4248 * We always keep i_blocks updated together with real
4249 * allocation. But to not confuse with user, stat
4250 * will return the blocks that include the delayed allocation
4251 * blocks for this file.
4252 */
4253 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
4254 delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
4255 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
4256
4257 stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
4258 return 0;
4259}
3196 4260
3197/* 4261/*
3198 * How many blocks doth make a writepage()? 4262 * How many blocks doth make a writepage()?
@@ -3506,3 +4570,64 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
3506 4570
3507 return err; 4571 return err;
3508} 4572}
4573
4574static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
4575{
4576 return !buffer_mapped(bh);
4577}
4578
4579int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
4580{
4581 loff_t size;
4582 unsigned long len;
4583 int ret = -EINVAL;
4584 struct file *file = vma->vm_file;
4585 struct inode *inode = file->f_path.dentry->d_inode;
4586 struct address_space *mapping = inode->i_mapping;
4587
4588 /*
4589 * Get i_alloc_sem to stop truncates messing with the inode. We cannot
4590 * get i_mutex because we are already holding mmap_sem.
4591 */
4592 down_read(&inode->i_alloc_sem);
4593 size = i_size_read(inode);
4594 if (page->mapping != mapping || size <= page_offset(page)
4595 || !PageUptodate(page)) {
4596 /* page got truncated from under us? */
4597 goto out_unlock;
4598 }
4599 ret = 0;
4600 if (PageMappedToDisk(page))
4601 goto out_unlock;
4602
4603 if (page->index == size >> PAGE_CACHE_SHIFT)
4604 len = size & ~PAGE_CACHE_MASK;
4605 else
4606 len = PAGE_CACHE_SIZE;
4607
4608 if (page_has_buffers(page)) {
4609 /* return if we have all the buffers mapped */
4610 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
4611 ext4_bh_unmapped))
4612 goto out_unlock;
4613 }
4614 /*
4615 * OK, we need to fill the hole... Do write_begin write_end
4616 * to do block allocation/reservation.We are not holding
4617 * inode.i__mutex here. That allow * parallel write_begin,
4618 * write_end call. lock_page prevent this from happening
4619 * on the same page though
4620 */
4621 ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
4622 len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
4623 if (ret < 0)
4624 goto out_unlock;
4625 ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
4626 len, len, page, NULL);
4627 if (ret < 0)
4628 goto out_unlock;
4629 ret = 0;
4630out_unlock:
4631 up_read(&inode->i_alloc_sem);
4632 return ret;
4633}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c9900aade150..8d141a25bbee 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -381,22 +381,28 @@ static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
381 381
382static inline int mb_find_next_zero_bit(void *addr, int max, int start) 382static inline int mb_find_next_zero_bit(void *addr, int max, int start)
383{ 383{
384 int fix = 0; 384 int fix = 0, ret, tmpmax;
385 addr = mb_correct_addr_and_bit(&fix, addr); 385 addr = mb_correct_addr_and_bit(&fix, addr);
386 max += fix; 386 tmpmax = max + fix;
387 start += fix; 387 start += fix;
388 388
389 return ext4_find_next_zero_bit(addr, max, start) - fix; 389 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
390 if (ret > max)
391 return max;
392 return ret;
390} 393}
391 394
392static inline int mb_find_next_bit(void *addr, int max, int start) 395static inline int mb_find_next_bit(void *addr, int max, int start)
393{ 396{
394 int fix = 0; 397 int fix = 0, ret, tmpmax;
395 addr = mb_correct_addr_and_bit(&fix, addr); 398 addr = mb_correct_addr_and_bit(&fix, addr);
396 max += fix; 399 tmpmax = max + fix;
397 start += fix; 400 start += fix;
398 401
399 return ext4_find_next_bit(addr, max, start) - fix; 402 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
403 if (ret > max)
404 return max;
405 return ret;
400} 406}
401 407
402static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) 408static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
@@ -803,6 +809,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
803 if (!buffer_uptodate(bh[i])) 809 if (!buffer_uptodate(bh[i]))
804 goto out; 810 goto out;
805 811
812 err = 0;
806 first_block = page->index * blocks_per_page; 813 first_block = page->index * blocks_per_page;
807 for (i = 0; i < blocks_per_page; i++) { 814 for (i = 0; i < blocks_per_page; i++) {
808 int group; 815 int group;
@@ -883,6 +890,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
883 int pnum; 890 int pnum;
884 int poff; 891 int poff;
885 struct page *page; 892 struct page *page;
893 int ret;
886 894
887 mb_debug("load group %lu\n", group); 895 mb_debug("load group %lu\n", group);
888 896
@@ -914,15 +922,21 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
914 if (page) { 922 if (page) {
915 BUG_ON(page->mapping != inode->i_mapping); 923 BUG_ON(page->mapping != inode->i_mapping);
916 if (!PageUptodate(page)) { 924 if (!PageUptodate(page)) {
917 ext4_mb_init_cache(page, NULL); 925 ret = ext4_mb_init_cache(page, NULL);
926 if (ret) {
927 unlock_page(page);
928 goto err;
929 }
918 mb_cmp_bitmaps(e4b, page_address(page) + 930 mb_cmp_bitmaps(e4b, page_address(page) +
919 (poff * sb->s_blocksize)); 931 (poff * sb->s_blocksize));
920 } 932 }
921 unlock_page(page); 933 unlock_page(page);
922 } 934 }
923 } 935 }
924 if (page == NULL || !PageUptodate(page)) 936 if (page == NULL || !PageUptodate(page)) {
937 ret = -EIO;
925 goto err; 938 goto err;
939 }
926 e4b->bd_bitmap_page = page; 940 e4b->bd_bitmap_page = page;
927 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); 941 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
928 mark_page_accessed(page); 942 mark_page_accessed(page);
@@ -938,14 +952,20 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
938 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); 952 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
939 if (page) { 953 if (page) {
940 BUG_ON(page->mapping != inode->i_mapping); 954 BUG_ON(page->mapping != inode->i_mapping);
941 if (!PageUptodate(page)) 955 if (!PageUptodate(page)) {
942 ext4_mb_init_cache(page, e4b->bd_bitmap); 956 ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
943 957 if (ret) {
958 unlock_page(page);
959 goto err;
960 }
961 }
944 unlock_page(page); 962 unlock_page(page);
945 } 963 }
946 } 964 }
947 if (page == NULL || !PageUptodate(page)) 965 if (page == NULL || !PageUptodate(page)) {
966 ret = -EIO;
948 goto err; 967 goto err;
968 }
949 e4b->bd_buddy_page = page; 969 e4b->bd_buddy_page = page;
950 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); 970 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
951 mark_page_accessed(page); 971 mark_page_accessed(page);
@@ -962,7 +982,7 @@ err:
962 page_cache_release(e4b->bd_buddy_page); 982 page_cache_release(e4b->bd_buddy_page);
963 e4b->bd_buddy = NULL; 983 e4b->bd_buddy = NULL;
964 e4b->bd_bitmap = NULL; 984 e4b->bd_bitmap = NULL;
965 return -EIO; 985 return ret;
966} 986}
967 987
968static void ext4_mb_release_desc(struct ext4_buddy *e4b) 988static void ext4_mb_release_desc(struct ext4_buddy *e4b)
@@ -1031,7 +1051,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
1031 } 1051 }
1032} 1052}
1033 1053
1034static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, 1054static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1035 int first, int count) 1055 int first, int count)
1036{ 1056{
1037 int block = 0; 1057 int block = 0;
@@ -1071,11 +1091,12 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1071 blocknr += block; 1091 blocknr += block;
1072 blocknr += 1092 blocknr +=
1073 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 1093 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
1074 1094 ext4_unlock_group(sb, e4b->bd_group);
1075 ext4_error(sb, __func__, "double-free of inode" 1095 ext4_error(sb, __func__, "double-free of inode"
1076 " %lu's block %llu(bit %u in group %lu)\n", 1096 " %lu's block %llu(bit %u in group %lu)\n",
1077 inode ? inode->i_ino : 0, blocknr, block, 1097 inode ? inode->i_ino : 0, blocknr, block,
1078 e4b->bd_group); 1098 e4b->bd_group);
1099 ext4_lock_group(sb, e4b->bd_group);
1079 } 1100 }
1080 mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); 1101 mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
1081 e4b->bd_info->bb_counters[order]++; 1102 e4b->bd_info->bb_counters[order]++;
@@ -1113,8 +1134,6 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1113 } while (1); 1134 } while (1);
1114 } 1135 }
1115 mb_check_buddy(e4b); 1136 mb_check_buddy(e4b);
1116
1117 return 0;
1118} 1137}
1119 1138
1120static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, 1139static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
@@ -1730,10 +1749,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1730 ac->ac_g_ex.fe_start = sbi->s_mb_last_start; 1749 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
1731 spin_unlock(&sbi->s_md_lock); 1750 spin_unlock(&sbi->s_md_lock);
1732 } 1751 }
1733
1734 /* searching for the right group start from the goal value specified */
1735 group = ac->ac_g_ex.fe_group;
1736
1737 /* Let's just scan groups to find more-less suitable blocks */ 1752 /* Let's just scan groups to find more-less suitable blocks */
1738 cr = ac->ac_2order ? 0 : 1; 1753 cr = ac->ac_2order ? 0 : 1;
1739 /* 1754 /*
@@ -1743,6 +1758,12 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1743repeat: 1758repeat:
1744 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) { 1759 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
1745 ac->ac_criteria = cr; 1760 ac->ac_criteria = cr;
1761 /*
1762 * searching for the right group start
1763 * from the goal value specified
1764 */
1765 group = ac->ac_g_ex.fe_group;
1766
1746 for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { 1767 for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
1747 struct ext4_group_info *grp; 1768 struct ext4_group_info *grp;
1748 struct ext4_group_desc *desc; 1769 struct ext4_group_desc *desc;
@@ -1963,6 +1984,8 @@ static int ext4_mb_seq_history_open(struct inode *inode, struct file *file)
1963 int rc; 1984 int rc;
1964 int size; 1985 int size;
1965 1986
1987 if (unlikely(sbi->s_mb_history == NULL))
1988 return -ENOMEM;
1966 s = kmalloc(sizeof(*s), GFP_KERNEL); 1989 s = kmalloc(sizeof(*s), GFP_KERNEL);
1967 if (s == NULL) 1990 if (s == NULL)
1968 return -ENOMEM; 1991 return -ENOMEM;
@@ -2165,9 +2188,7 @@ static void ext4_mb_history_init(struct super_block *sb)
2165 sbi->s_mb_history_cur = 0; 2188 sbi->s_mb_history_cur = 0;
2166 spin_lock_init(&sbi->s_mb_history_lock); 2189 spin_lock_init(&sbi->s_mb_history_lock);
2167 i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); 2190 i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
2168 sbi->s_mb_history = kmalloc(i, GFP_KERNEL); 2191 sbi->s_mb_history = kzalloc(i, GFP_KERNEL);
2169 if (likely(sbi->s_mb_history != NULL))
2170 memset(sbi->s_mb_history, 0, i);
2171 /* if we can't allocate history, then we simple won't use it */ 2192 /* if we can't allocate history, then we simple won't use it */
2172} 2193}
2173 2194
@@ -2215,21 +2236,192 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
2215#define ext4_mb_history_init(sb) 2236#define ext4_mb_history_init(sb)
2216#endif 2237#endif
2217 2238
2239
2240/* Create and initialize ext4_group_info data for the given group. */
2241int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2242 struct ext4_group_desc *desc)
2243{
2244 int i, len;
2245 int metalen = 0;
2246 struct ext4_sb_info *sbi = EXT4_SB(sb);
2247 struct ext4_group_info **meta_group_info;
2248
2249 /*
2250 * First check if this group is the first of a reserved block.
2251 * If it's true, we have to allocate a new table of pointers
2252 * to ext4_group_info structures
2253 */
2254 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2255 metalen = sizeof(*meta_group_info) <<
2256 EXT4_DESC_PER_BLOCK_BITS(sb);
2257 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2258 if (meta_group_info == NULL) {
2259 printk(KERN_ERR "EXT4-fs: can't allocate mem for a "
2260 "buddy group\n");
2261 goto exit_meta_group_info;
2262 }
2263 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
2264 meta_group_info;
2265 }
2266
2267 /*
2268 * calculate needed size. if change bb_counters size,
2269 * don't forget about ext4_mb_generate_buddy()
2270 */
2271 len = offsetof(typeof(**meta_group_info),
2272 bb_counters[sb->s_blocksize_bits + 2]);
2273
2274 meta_group_info =
2275 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2276 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2277
2278 meta_group_info[i] = kzalloc(len, GFP_KERNEL);
2279 if (meta_group_info[i] == NULL) {
2280 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2281 goto exit_group_info;
2282 }
2283 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2284 &(meta_group_info[i]->bb_state));
2285
2286 /*
2287 * initialize bb_free to be able to skip
2288 * empty groups without initialization
2289 */
2290 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2291 meta_group_info[i]->bb_free =
2292 ext4_free_blocks_after_init(sb, group, desc);
2293 } else {
2294 meta_group_info[i]->bb_free =
2295 le16_to_cpu(desc->bg_free_blocks_count);
2296 }
2297
2298 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2299
2300#ifdef DOUBLE_CHECK
2301 {
2302 struct buffer_head *bh;
2303 meta_group_info[i]->bb_bitmap =
2304 kmalloc(sb->s_blocksize, GFP_KERNEL);
2305 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2306 bh = ext4_read_block_bitmap(sb, group);
2307 BUG_ON(bh == NULL);
2308 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2309 sb->s_blocksize);
2310 put_bh(bh);
2311 }
2312#endif
2313
2314 return 0;
2315
2316exit_group_info:
2317 /* If a meta_group_info table has been allocated, release it now */
2318 if (group % EXT4_DESC_PER_BLOCK(sb) == 0)
2319 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2320exit_meta_group_info:
2321 return -ENOMEM;
2322} /* ext4_mb_add_groupinfo */
2323
2324/*
2325 * Add a group to the existing groups.
2326 * This function is used for online resize
2327 */
2328int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group,
2329 struct ext4_group_desc *desc)
2330{
2331 struct ext4_sb_info *sbi = EXT4_SB(sb);
2332 struct inode *inode = sbi->s_buddy_cache;
2333 int blocks_per_page;
2334 int block;
2335 int pnum;
2336 struct page *page;
2337 int err;
2338
2339 /* Add group based on group descriptor*/
2340 err = ext4_mb_add_groupinfo(sb, group, desc);
2341 if (err)
2342 return err;
2343
2344 /*
2345 * Cache pages containing dynamic mb_alloc datas (buddy and bitmap
2346 * datas) are set not up to date so that they will be re-initilaized
2347 * during the next call to ext4_mb_load_buddy
2348 */
2349
2350 /* Set buddy page as not up to date */
2351 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
2352 block = group * 2;
2353 pnum = block / blocks_per_page;
2354 page = find_get_page(inode->i_mapping, pnum);
2355 if (page != NULL) {
2356 ClearPageUptodate(page);
2357 page_cache_release(page);
2358 }
2359
2360 /* Set bitmap page as not up to date */
2361 block++;
2362 pnum = block / blocks_per_page;
2363 page = find_get_page(inode->i_mapping, pnum);
2364 if (page != NULL) {
2365 ClearPageUptodate(page);
2366 page_cache_release(page);
2367 }
2368
2369 return 0;
2370}
2371
2372/*
2373 * Update an existing group.
2374 * This function is used for online resize
2375 */
2376void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
2377{
2378 grp->bb_free += add;
2379}
2380
2218static int ext4_mb_init_backend(struct super_block *sb) 2381static int ext4_mb_init_backend(struct super_block *sb)
2219{ 2382{
2220 ext4_group_t i; 2383 ext4_group_t i;
2221 int j, len, metalen; 2384 int metalen;
2222 struct ext4_sb_info *sbi = EXT4_SB(sb); 2385 struct ext4_sb_info *sbi = EXT4_SB(sb);
2223 int num_meta_group_infos = 2386 struct ext4_super_block *es = sbi->s_es;
2224 (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) >> 2387 int num_meta_group_infos;
2225 EXT4_DESC_PER_BLOCK_BITS(sb); 2388 int num_meta_group_infos_max;
2389 int array_size;
2226 struct ext4_group_info **meta_group_info; 2390 struct ext4_group_info **meta_group_info;
2391 struct ext4_group_desc *desc;
2392
2393 /* This is the number of blocks used by GDT */
2394 num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) -
2395 1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
2396
2397 /*
2398 * This is the total number of blocks used by GDT including
2399 * the number of reserved blocks for GDT.
2400 * The s_group_info array is allocated with this value
2401 * to allow a clean online resize without a complex
2402 * manipulation of pointer.
2403 * The drawback is the unused memory when no resize
2404 * occurs but it's very low in terms of pages
2405 * (see comments below)
2406 * Need to handle this properly when META_BG resizing is allowed
2407 */
2408 num_meta_group_infos_max = num_meta_group_infos +
2409 le16_to_cpu(es->s_reserved_gdt_blocks);
2227 2410
2411 /*
2412 * array_size is the size of s_group_info array. We round it
2413 * to the next power of two because this approximation is done
2414 * internally by kmalloc so we can have some more memory
2415 * for free here (e.g. may be used for META_BG resize).
2416 */
2417 array_size = 1;
2418 while (array_size < sizeof(*sbi->s_group_info) *
2419 num_meta_group_infos_max)
2420 array_size = array_size << 1;
2228 /* An 8TB filesystem with 64-bit pointers requires a 4096 byte 2421 /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
2229 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. 2422 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
2230 * So a two level scheme suffices for now. */ 2423 * So a two level scheme suffices for now. */
2231 sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) * 2424 sbi->s_group_info = kmalloc(array_size, GFP_KERNEL);
2232 num_meta_group_infos, GFP_KERNEL);
2233 if (sbi->s_group_info == NULL) { 2425 if (sbi->s_group_info == NULL) {
2234 printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); 2426 printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
2235 return -ENOMEM; 2427 return -ENOMEM;
@@ -2256,63 +2448,15 @@ static int ext4_mb_init_backend(struct super_block *sb)
2256 sbi->s_group_info[i] = meta_group_info; 2448 sbi->s_group_info[i] = meta_group_info;
2257 } 2449 }
2258 2450
2259 /*
2260 * calculate needed size. if change bb_counters size,
2261 * don't forget about ext4_mb_generate_buddy()
2262 */
2263 len = sizeof(struct ext4_group_info);
2264 len += sizeof(unsigned short) * (sb->s_blocksize_bits + 2);
2265 for (i = 0; i < sbi->s_groups_count; i++) { 2451 for (i = 0; i < sbi->s_groups_count; i++) {
2266 struct ext4_group_desc *desc;
2267
2268 meta_group_info =
2269 sbi->s_group_info[i >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2270 j = i & (EXT4_DESC_PER_BLOCK(sb) - 1);
2271
2272 meta_group_info[j] = kzalloc(len, GFP_KERNEL);
2273 if (meta_group_info[j] == NULL) {
2274 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2275 goto err_freebuddy;
2276 }
2277 desc = ext4_get_group_desc(sb, i, NULL); 2452 desc = ext4_get_group_desc(sb, i, NULL);
2278 if (desc == NULL) { 2453 if (desc == NULL) {
2279 printk(KERN_ERR 2454 printk(KERN_ERR
2280 "EXT4-fs: can't read descriptor %lu\n", i); 2455 "EXT4-fs: can't read descriptor %lu\n", i);
2281 i++;
2282 goto err_freebuddy; 2456 goto err_freebuddy;
2283 } 2457 }
2284 memset(meta_group_info[j], 0, len); 2458 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2285 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, 2459 goto err_freebuddy;
2286 &(meta_group_info[j]->bb_state));
2287
2288 /*
2289 * initialize bb_free to be able to skip
2290 * empty groups without initialization
2291 */
2292 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2293 meta_group_info[j]->bb_free =
2294 ext4_free_blocks_after_init(sb, i, desc);
2295 } else {
2296 meta_group_info[j]->bb_free =
2297 le16_to_cpu(desc->bg_free_blocks_count);
2298 }
2299
2300 INIT_LIST_HEAD(&meta_group_info[j]->bb_prealloc_list);
2301
2302#ifdef DOUBLE_CHECK
2303 {
2304 struct buffer_head *bh;
2305 meta_group_info[j]->bb_bitmap =
2306 kmalloc(sb->s_blocksize, GFP_KERNEL);
2307 BUG_ON(meta_group_info[j]->bb_bitmap == NULL);
2308 bh = read_block_bitmap(sb, i);
2309 BUG_ON(bh == NULL);
2310 memcpy(meta_group_info[j]->bb_bitmap, bh->b_data,
2311 sb->s_blocksize);
2312 put_bh(bh);
2313 }
2314#endif
2315
2316 } 2460 }
2317 2461
2318 return 0; 2462 return 0;
@@ -2336,6 +2480,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2336 unsigned i; 2480 unsigned i;
2337 unsigned offset; 2481 unsigned offset;
2338 unsigned max; 2482 unsigned max;
2483 int ret;
2339 2484
2340 if (!test_opt(sb, MBALLOC)) 2485 if (!test_opt(sb, MBALLOC))
2341 return 0; 2486 return 0;
@@ -2370,12 +2515,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2370 } while (i <= sb->s_blocksize_bits + 1); 2515 } while (i <= sb->s_blocksize_bits + 1);
2371 2516
2372 /* init file for buddy data */ 2517 /* init file for buddy data */
2373 i = ext4_mb_init_backend(sb); 2518 ret = ext4_mb_init_backend(sb);
2374 if (i) { 2519 if (ret != 0) {
2375 clear_opt(sbi->s_mount_opt, MBALLOC); 2520 clear_opt(sbi->s_mount_opt, MBALLOC);
2376 kfree(sbi->s_mb_offsets); 2521 kfree(sbi->s_mb_offsets);
2377 kfree(sbi->s_mb_maxs); 2522 kfree(sbi->s_mb_maxs);
2378 return i; 2523 return ret;
2379 } 2524 }
2380 2525
2381 spin_lock_init(&sbi->s_md_lock); 2526 spin_lock_init(&sbi->s_md_lock);
@@ -2548,8 +2693,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
2548 ext4_lock_group(sb, md->group); 2693 ext4_lock_group(sb, md->group);
2549 for (i = 0; i < md->num; i++) { 2694 for (i = 0; i < md->num; i++) {
2550 mb_debug(" %u", md->blocks[i]); 2695 mb_debug(" %u", md->blocks[i]);
2551 err = mb_free_blocks(NULL, &e4b, md->blocks[i], 1); 2696 mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
2552 BUG_ON(err != 0);
2553 } 2697 }
2554 mb_debug("\n"); 2698 mb_debug("\n");
2555 ext4_unlock_group(sb, md->group); 2699 ext4_unlock_group(sb, md->group);
@@ -2575,25 +2719,24 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
2575 2719
2576 2720
2577 2721
2578#define MB_PROC_VALUE_READ(name) \ 2722#define MB_PROC_FOPS(name) \
2579static int ext4_mb_read_##name(char *page, char **start, \ 2723static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \
2580 off_t off, int count, int *eof, void *data) \
2581{ \ 2724{ \
2582 struct ext4_sb_info *sbi = data; \ 2725 struct ext4_sb_info *sbi = m->private; \
2583 int len; \ 2726 \
2584 *eof = 1; \ 2727 seq_printf(m, "%ld\n", sbi->s_mb_##name); \
2585 if (off != 0) \ 2728 return 0; \
2586 return 0; \ 2729} \
2587 len = sprintf(page, "%ld\n", sbi->s_mb_##name); \ 2730 \
2588 *start = page; \ 2731static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\
2589 return len; \ 2732{ \
2590} 2733 return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\
2591 2734} \
2592#define MB_PROC_VALUE_WRITE(name) \ 2735 \
2593static int ext4_mb_write_##name(struct file *file, \ 2736static ssize_t ext4_mb_##name##_proc_write(struct file *file, \
2594 const char __user *buf, unsigned long cnt, void *data) \ 2737 const char __user *buf, size_t cnt, loff_t *ppos) \
2595{ \ 2738{ \
2596 struct ext4_sb_info *sbi = data; \ 2739 struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\
2597 char str[32]; \ 2740 char str[32]; \
2598 long value; \ 2741 long value; \
2599 if (cnt >= sizeof(str)) \ 2742 if (cnt >= sizeof(str)) \
@@ -2605,31 +2748,32 @@ static int ext4_mb_write_##name(struct file *file, \
2605 return -ERANGE; \ 2748 return -ERANGE; \
2606 sbi->s_mb_##name = value; \ 2749 sbi->s_mb_##name = value; \
2607 return cnt; \ 2750 return cnt; \
2608} 2751} \
2752 \
2753static const struct file_operations ext4_mb_##name##_proc_fops = { \
2754 .owner = THIS_MODULE, \
2755 .open = ext4_mb_##name##_proc_open, \
2756 .read = seq_read, \
2757 .llseek = seq_lseek, \
2758 .release = single_release, \
2759 .write = ext4_mb_##name##_proc_write, \
2760};
2609 2761
2610MB_PROC_VALUE_READ(stats); 2762MB_PROC_FOPS(stats);
2611MB_PROC_VALUE_WRITE(stats); 2763MB_PROC_FOPS(max_to_scan);
2612MB_PROC_VALUE_READ(max_to_scan); 2764MB_PROC_FOPS(min_to_scan);
2613MB_PROC_VALUE_WRITE(max_to_scan); 2765MB_PROC_FOPS(order2_reqs);
2614MB_PROC_VALUE_READ(min_to_scan); 2766MB_PROC_FOPS(stream_request);
2615MB_PROC_VALUE_WRITE(min_to_scan); 2767MB_PROC_FOPS(group_prealloc);
2616MB_PROC_VALUE_READ(order2_reqs);
2617MB_PROC_VALUE_WRITE(order2_reqs);
2618MB_PROC_VALUE_READ(stream_request);
2619MB_PROC_VALUE_WRITE(stream_request);
2620MB_PROC_VALUE_READ(group_prealloc);
2621MB_PROC_VALUE_WRITE(group_prealloc);
2622 2768
2623#define MB_PROC_HANDLER(name, var) \ 2769#define MB_PROC_HANDLER(name, var) \
2624do { \ 2770do { \
2625 proc = create_proc_entry(name, mode, sbi->s_mb_proc); \ 2771 proc = proc_create_data(name, mode, sbi->s_mb_proc, \
2772 &ext4_mb_##var##_proc_fops, sbi); \
2626 if (proc == NULL) { \ 2773 if (proc == NULL) { \
2627 printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \ 2774 printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \
2628 goto err_out; \ 2775 goto err_out; \
2629 } \ 2776 } \
2630 proc->data = sbi; \
2631 proc->read_proc = ext4_mb_read_##var ; \
2632 proc->write_proc = ext4_mb_write_##var; \
2633} while (0) 2777} while (0)
2634 2778
2635static int ext4_mb_init_per_dev_proc(struct super_block *sb) 2779static int ext4_mb_init_per_dev_proc(struct super_block *sb)
@@ -2639,6 +2783,10 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb)
2639 struct proc_dir_entry *proc; 2783 struct proc_dir_entry *proc;
2640 char devname[64]; 2784 char devname[64];
2641 2785
2786 if (proc_root_ext4 == NULL) {
2787 sbi->s_mb_proc = NULL;
2788 return -EINVAL;
2789 }
2642 bdevname(sb->s_bdev, devname); 2790 bdevname(sb->s_bdev, devname);
2643 sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); 2791 sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
2644 2792
@@ -2747,7 +2895,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2747 2895
2748 2896
2749 err = -EIO; 2897 err = -EIO;
2750 bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group); 2898 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2751 if (!bitmap_bh) 2899 if (!bitmap_bh)
2752 goto out_err; 2900 goto out_err;
2753 2901
@@ -2816,7 +2964,23 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2816 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); 2964 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
2817 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2965 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
2818 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 2966 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
2819 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 2967
2968 /*
2969 * free blocks account has already be reduced/reserved
2970 * at write_begin() time for delayed allocation
2971 * do not double accounting
2972 */
2973 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2974 percpu_counter_sub(&sbi->s_freeblocks_counter,
2975 ac->ac_b_ex.fe_len);
2976
2977 if (sbi->s_log_groups_per_flex) {
2978 ext4_group_t flex_group = ext4_flex_group(sbi,
2979 ac->ac_b_ex.fe_group);
2980 spin_lock(sb_bgl_lock(sbi, flex_group));
2981 sbi->s_flex_groups[flex_group].free_blocks -= ac->ac_b_ex.fe_len;
2982 spin_unlock(sb_bgl_lock(sbi, flex_group));
2983 }
2820 2984
2821 err = ext4_journal_dirty_metadata(handle, bitmap_bh); 2985 err = ext4_journal_dirty_metadata(handle, bitmap_bh);
2822 if (err) 2986 if (err)
@@ -3473,8 +3637,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3473 if (bit >= end) 3637 if (bit >= end)
3474 break; 3638 break;
3475 next = mb_find_next_bit(bitmap_bh->b_data, end, bit); 3639 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3476 if (next > end)
3477 next = end;
3478 start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + 3640 start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
3479 le32_to_cpu(sbi->s_es->s_first_data_block); 3641 le32_to_cpu(sbi->s_es->s_first_data_block);
3480 mb_debug(" free preallocated %u/%u in group %u\n", 3642 mb_debug(" free preallocated %u/%u in group %u\n",
@@ -3569,7 +3731,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3569 if (list_empty(&grp->bb_prealloc_list)) 3731 if (list_empty(&grp->bb_prealloc_list))
3570 return 0; 3732 return 0;
3571 3733
3572 bitmap_bh = read_block_bitmap(sb, group); 3734 bitmap_bh = ext4_read_block_bitmap(sb, group);
3573 if (bitmap_bh == NULL) { 3735 if (bitmap_bh == NULL) {
3574 /* error handling here */ 3736 /* error handling here */
3575 ext4_mb_release_desc(&e4b); 3737 ext4_mb_release_desc(&e4b);
@@ -3743,7 +3905,7 @@ repeat:
3743 err = ext4_mb_load_buddy(sb, group, &e4b); 3905 err = ext4_mb_load_buddy(sb, group, &e4b);
3744 BUG_ON(err != 0); /* error handling here */ 3906 BUG_ON(err != 0); /* error handling here */
3745 3907
3746 bitmap_bh = read_block_bitmap(sb, group); 3908 bitmap_bh = ext4_read_block_bitmap(sb, group);
3747 if (bitmap_bh == NULL) { 3909 if (bitmap_bh == NULL) {
3748 /* error handling here */ 3910 /* error handling here */
3749 ext4_mb_release_desc(&e4b); 3911 ext4_mb_release_desc(&e4b);
@@ -4011,10 +4173,21 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4011 sbi = EXT4_SB(sb); 4173 sbi = EXT4_SB(sb);
4012 4174
4013 if (!test_opt(sb, MBALLOC)) { 4175 if (!test_opt(sb, MBALLOC)) {
4014 block = ext4_new_blocks_old(handle, ar->inode, ar->goal, 4176 block = ext4_old_new_blocks(handle, ar->inode, ar->goal,
4015 &(ar->len), errp); 4177 &(ar->len), errp);
4016 return block; 4178 return block;
4017 } 4179 }
4180 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
4181 /*
4182 * With delalloc we already reserved the blocks
4183 */
4184 ar->len = ext4_has_free_blocks(sbi, ar->len);
4185 }
4186
4187 if (ar->len == 0) {
4188 *errp = -ENOSPC;
4189 return 0;
4190 }
4018 4191
4019 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { 4192 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
4020 ar->flags |= EXT4_MB_HINT_NOPREALLOC; 4193 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
@@ -4026,10 +4199,14 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4026 } 4199 }
4027 inquota = ar->len; 4200 inquota = ar->len;
4028 4201
4202 if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
4203 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4204
4029 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4205 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4030 if (!ac) { 4206 if (!ac) {
4207 ar->len = 0;
4031 *errp = -ENOMEM; 4208 *errp = -ENOMEM;
4032 return 0; 4209 goto out1;
4033 } 4210 }
4034 4211
4035 ext4_mb_poll_new_transaction(sb, handle); 4212 ext4_mb_poll_new_transaction(sb, handle);
@@ -4037,12 +4214,11 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4037 *errp = ext4_mb_initialize_context(ac, ar); 4214 *errp = ext4_mb_initialize_context(ac, ar);
4038 if (*errp) { 4215 if (*errp) {
4039 ar->len = 0; 4216 ar->len = 0;
4040 goto out; 4217 goto out2;
4041 } 4218 }
4042 4219
4043 ac->ac_op = EXT4_MB_HISTORY_PREALLOC; 4220 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4044 if (!ext4_mb_use_preallocated(ac)) { 4221 if (!ext4_mb_use_preallocated(ac)) {
4045
4046 ac->ac_op = EXT4_MB_HISTORY_ALLOC; 4222 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4047 ext4_mb_normalize_request(ac, ar); 4223 ext4_mb_normalize_request(ac, ar);
4048repeat: 4224repeat:
@@ -4085,11 +4261,12 @@ repeat:
4085 4261
4086 ext4_mb_release_context(ac); 4262 ext4_mb_release_context(ac);
4087 4263
4088out: 4264out2:
4265 kmem_cache_free(ext4_ac_cachep, ac);
4266out1:
4089 if (ar->len < inquota) 4267 if (ar->len < inquota)
4090 DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); 4268 DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len);
4091 4269
4092 kmem_cache_free(ext4_ac_cachep, ac);
4093 return block; 4270 return block;
4094} 4271}
4095static void ext4_mb_poll_new_transaction(struct super_block *sb, 4272static void ext4_mb_poll_new_transaction(struct super_block *sb,
@@ -4242,7 +4419,7 @@ do_more:
4242 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); 4419 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
4243 count -= overflow; 4420 count -= overflow;
4244 } 4421 }
4245 bitmap_bh = read_block_bitmap(sb, block_group); 4422 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4246 if (!bitmap_bh) 4423 if (!bitmap_bh)
4247 goto error_return; 4424 goto error_return;
4248 gdp = ext4_get_group_desc(sb, block_group, &gd_bh); 4425 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
@@ -4309,10 +4486,9 @@ do_more:
4309 ext4_mb_free_metadata(handle, &e4b, block_group, bit, count); 4486 ext4_mb_free_metadata(handle, &e4b, block_group, bit, count);
4310 } else { 4487 } else {
4311 ext4_lock_group(sb, block_group); 4488 ext4_lock_group(sb, block_group);
4312 err = mb_free_blocks(inode, &e4b, bit, count); 4489 mb_free_blocks(inode, &e4b, bit, count);
4313 ext4_mb_return_to_preallocation(inode, &e4b, block, count); 4490 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
4314 ext4_unlock_group(sb, block_group); 4491 ext4_unlock_group(sb, block_group);
4315 BUG_ON(err != 0);
4316 } 4492 }
4317 4493
4318 spin_lock(sb_bgl_lock(sbi, block_group)); 4494 spin_lock(sb_bgl_lock(sbi, block_group));
@@ -4321,6 +4497,13 @@ do_more:
4321 spin_unlock(sb_bgl_lock(sbi, block_group)); 4497 spin_unlock(sb_bgl_lock(sbi, block_group));
4322 percpu_counter_add(&sbi->s_freeblocks_counter, count); 4498 percpu_counter_add(&sbi->s_freeblocks_counter, count);
4323 4499
4500 if (sbi->s_log_groups_per_flex) {
4501 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4502 spin_lock(sb_bgl_lock(sbi, flex_group));
4503 sbi->s_flex_groups[flex_group].free_blocks += count;
4504 spin_unlock(sb_bgl_lock(sbi, flex_group));
4505 }
4506
4324 ext4_mb_release_desc(&e4b); 4507 ext4_mb_release_desc(&e4b);
4325 4508
4326 *freed += count; 4509 *freed += count;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index ab16beaa830d..387ad98350c3 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -183,6 +183,16 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
183 struct inode *inode); 183 struct inode *inode);
184 184
185/* 185/*
186 * p is at least 6 bytes before the end of page
187 */
188static inline struct ext4_dir_entry_2 *
189ext4_next_entry(struct ext4_dir_entry_2 *p)
190{
191 return (struct ext4_dir_entry_2 *)((char *)p +
192 ext4_rec_len_from_disk(p->rec_len));
193}
194
195/*
186 * Future: use high four bits of block for coalesce-on-delete flags 196 * Future: use high four bits of block for coalesce-on-delete flags
187 * Mask them off for now. 197 * Mask them off for now.
188 */ 198 */
@@ -231,13 +241,13 @@ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
231{ 241{
232 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - 242 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
233 EXT4_DIR_REC_LEN(2) - infosize; 243 EXT4_DIR_REC_LEN(2) - infosize;
234 return 0? 20: entry_space / sizeof(struct dx_entry); 244 return entry_space / sizeof(struct dx_entry);
235} 245}
236 246
237static inline unsigned dx_node_limit (struct inode *dir) 247static inline unsigned dx_node_limit (struct inode *dir)
238{ 248{
239 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); 249 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
240 return 0? 22: entry_space / sizeof(struct dx_entry); 250 return entry_space / sizeof(struct dx_entry);
241} 251}
242 252
243/* 253/*
@@ -554,15 +564,6 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
554 564
555 565
556/* 566/*
557 * p is at least 6 bytes before the end of page
558 */
559static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p)
560{
561 return (struct ext4_dir_entry_2 *)((char *)p +
562 ext4_rec_len_from_disk(p->rec_len));
563}
564
565/*
566 * This function fills a red-black tree with information from a 567 * This function fills a red-black tree with information from a
567 * directory block. It returns the number directory entries loaded 568 * directory block. It returns the number directory entries loaded
568 * into the tree. If there is an error it is returned in err. 569 * into the tree. If there is an error it is returned in err.
@@ -993,19 +994,21 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
993 de = (struct ext4_dir_entry_2 *) bh->b_data; 994 de = (struct ext4_dir_entry_2 *) bh->b_data;
994 top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize - 995 top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
995 EXT4_DIR_REC_LEN(0)); 996 EXT4_DIR_REC_LEN(0));
996 for (; de < top; de = ext4_next_entry(de)) 997 for (; de < top; de = ext4_next_entry(de)) {
997 if (ext4_match (namelen, name, de)) { 998 int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
998 if (!ext4_check_dir_entry("ext4_find_entry", 999 + ((char *) de - bh->b_data);
999 dir, de, bh, 1000
1000 (block<<EXT4_BLOCK_SIZE_BITS(sb)) 1001 if (!ext4_check_dir_entry(__func__, dir, de, bh, off)) {
1001 +((char *)de - bh->b_data))) { 1002 brelse(bh);
1002 brelse (bh);
1003 *err = ERR_BAD_DX_DIR; 1003 *err = ERR_BAD_DX_DIR;
1004 goto errout; 1004 goto errout;
1005 } 1005 }
1006 *res_dir = de; 1006
1007 dx_release (frames); 1007 if (ext4_match(namelen, name, de)) {
1008 return bh; 1008 *res_dir = de;
1009 dx_release(frames);
1010 return bh;
1011 }
1009 } 1012 }
1010 brelse (bh); 1013 brelse (bh);
1011 /* Check to see if we should continue to search */ 1014 /* Check to see if we should continue to search */
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 9ff7b1c04239..f000fbe2cd93 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -866,6 +866,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
866 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); 866 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
867 867
868 /* 868 /*
869 * We can allocate memory for mb_alloc based on the new group
870 * descriptor
871 */
872 if (test_opt(sb, MBALLOC)) {
873 err = ext4_mb_add_more_groupinfo(sb, input->group, gdp);
874 if (err)
875 goto exit_journal;
876 }
877 /*
869 * Make the new blocks and inodes valid next. We do this before 878 * Make the new blocks and inodes valid next. We do this before
870 * increasing the group count so that once the group is enabled, 879 * increasing the group count so that once the group is enabled,
871 * all of its blocks and inodes are already valid. 880 * all of its blocks and inodes are already valid.
@@ -957,6 +966,8 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
957 handle_t *handle; 966 handle_t *handle;
958 int err; 967 int err;
959 unsigned long freed_blocks; 968 unsigned long freed_blocks;
969 ext4_group_t group;
970 struct ext4_group_info *grp;
960 971
961 /* We don't need to worry about locking wrt other resizers just 972 /* We don't need to worry about locking wrt other resizers just
962 * yet: we're going to revalidate es->s_blocks_count after 973 * yet: we're going to revalidate es->s_blocks_count after
@@ -988,7 +999,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
988 } 999 }
989 1000
990 /* Handle the remaining blocks in the last group only. */ 1001 /* Handle the remaining blocks in the last group only. */
991 ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last); 1002 ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
992 1003
993 if (last == 0) { 1004 if (last == 0) {
994 ext4_warning(sb, __func__, 1005 ext4_warning(sb, __func__,
@@ -1060,6 +1071,45 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1060 o_blocks_count + add); 1071 o_blocks_count + add);
1061 if ((err = ext4_journal_stop(handle))) 1072 if ((err = ext4_journal_stop(handle)))
1062 goto exit_put; 1073 goto exit_put;
1074
1075 /*
1076 * Mark mballoc pages as not up to date so that they will be updated
1077 * next time they are loaded by ext4_mb_load_buddy.
1078 */
1079 if (test_opt(sb, MBALLOC)) {
1080 struct ext4_sb_info *sbi = EXT4_SB(sb);
1081 struct inode *inode = sbi->s_buddy_cache;
1082 int blocks_per_page;
1083 int block;
1084 int pnum;
1085 struct page *page;
1086
1087 /* Set buddy page as not up to date */
1088 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1089 block = group * 2;
1090 pnum = block / blocks_per_page;
1091 page = find_get_page(inode->i_mapping, pnum);
1092 if (page != NULL) {
1093 ClearPageUptodate(page);
1094 page_cache_release(page);
1095 }
1096
1097 /* Set bitmap page as not up to date */
1098 block++;
1099 pnum = block / blocks_per_page;
1100 page = find_get_page(inode->i_mapping, pnum);
1101 if (page != NULL) {
1102 ClearPageUptodate(page);
1103 page_cache_release(page);
1104 }
1105
1106 /* Get the info on the last group */
1107 grp = ext4_get_group_info(sb, group);
1108
1109 /* Update free blocks in group info */
1110 ext4_mb_update_group_info(grp, add);
1111 }
1112
1063 if (test_opt(sb, DEBUG)) 1113 if (test_opt(sb, DEBUG))
1064 printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", 1114 printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
1065 ext4_blocks_count(es)); 1115 ext4_blocks_count(es));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 02bf24343979..1cb371dcd609 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -506,6 +506,7 @@ static void ext4_put_super (struct super_block * sb)
506 ext4_ext_release(sb); 506 ext4_ext_release(sb);
507 ext4_xattr_put_super(sb); 507 ext4_xattr_put_super(sb);
508 jbd2_journal_destroy(sbi->s_journal); 508 jbd2_journal_destroy(sbi->s_journal);
509 sbi->s_journal = NULL;
509 if (!(sb->s_flags & MS_RDONLY)) { 510 if (!(sb->s_flags & MS_RDONLY)) {
510 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 511 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
511 es->s_state = cpu_to_le16(sbi->s_mount_state); 512 es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -517,6 +518,7 @@ static void ext4_put_super (struct super_block * sb)
517 for (i = 0; i < sbi->s_gdb_count; i++) 518 for (i = 0; i < sbi->s_gdb_count; i++)
518 brelse(sbi->s_group_desc[i]); 519 brelse(sbi->s_group_desc[i]);
519 kfree(sbi->s_group_desc); 520 kfree(sbi->s_group_desc);
521 kfree(sbi->s_flex_groups);
520 percpu_counter_destroy(&sbi->s_freeblocks_counter); 522 percpu_counter_destroy(&sbi->s_freeblocks_counter);
521 percpu_counter_destroy(&sbi->s_freeinodes_counter); 523 percpu_counter_destroy(&sbi->s_freeinodes_counter);
522 percpu_counter_destroy(&sbi->s_dirs_counter); 524 percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -571,6 +573,12 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
571 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 573 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
572 INIT_LIST_HEAD(&ei->i_prealloc_list); 574 INIT_LIST_HEAD(&ei->i_prealloc_list);
573 spin_lock_init(&ei->i_prealloc_lock); 575 spin_lock_init(&ei->i_prealloc_lock);
576 jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
577 ei->i_reserved_data_blocks = 0;
578 ei->i_reserved_meta_blocks = 0;
579 ei->i_allocated_meta_blocks = 0;
580 ei->i_delalloc_reserved_flag = 0;
581 spin_lock_init(&(ei->i_block_reservation_lock));
574 return &ei->vfs_inode; 582 return &ei->vfs_inode;
575} 583}
576 584
@@ -635,6 +643,8 @@ static void ext4_clear_inode(struct inode *inode)
635 EXT4_I(inode)->i_block_alloc_info = NULL; 643 EXT4_I(inode)->i_block_alloc_info = NULL;
636 if (unlikely(rsv)) 644 if (unlikely(rsv))
637 kfree(rsv); 645 kfree(rsv);
646 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
647 &EXT4_I(inode)->jinode);
638} 648}
639 649
640static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) 650static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb)
@@ -671,7 +681,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
671 unsigned long def_mount_opts; 681 unsigned long def_mount_opts;
672 struct super_block *sb = vfs->mnt_sb; 682 struct super_block *sb = vfs->mnt_sb;
673 struct ext4_sb_info *sbi = EXT4_SB(sb); 683 struct ext4_sb_info *sbi = EXT4_SB(sb);
674 journal_t *journal = sbi->s_journal;
675 struct ext4_super_block *es = sbi->s_es; 684 struct ext4_super_block *es = sbi->s_es;
676 685
677 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 686 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
@@ -747,6 +756,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
747 seq_puts(seq, ",nomballoc"); 756 seq_puts(seq, ",nomballoc");
748 if (test_opt(sb, I_VERSION)) 757 if (test_opt(sb, I_VERSION))
749 seq_puts(seq, ",i_version"); 758 seq_puts(seq, ",i_version");
759 if (!test_opt(sb, DELALLOC))
760 seq_puts(seq, ",nodelalloc");
761
750 762
751 if (sbi->s_stripe) 763 if (sbi->s_stripe)
752 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 764 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
@@ -894,7 +906,7 @@ enum {
894 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 906 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
895 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 907 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
896 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, 908 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
897 Opt_mballoc, Opt_nomballoc, Opt_stripe, 909 Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
898}; 910};
899 911
900static match_table_t tokens = { 912static match_table_t tokens = {
@@ -953,6 +965,8 @@ static match_table_t tokens = {
953 {Opt_nomballoc, "nomballoc"}, 965 {Opt_nomballoc, "nomballoc"},
954 {Opt_stripe, "stripe=%u"}, 966 {Opt_stripe, "stripe=%u"},
955 {Opt_resize, "resize"}, 967 {Opt_resize, "resize"},
968 {Opt_delalloc, "delalloc"},
969 {Opt_nodelalloc, "nodelalloc"},
956 {Opt_err, NULL}, 970 {Opt_err, NULL},
957}; 971};
958 972
@@ -990,6 +1004,7 @@ static int parse_options (char *options, struct super_block *sb,
990 int qtype, qfmt; 1004 int qtype, qfmt;
991 char *qname; 1005 char *qname;
992#endif 1006#endif
1007 ext4_fsblk_t last_block;
993 1008
994 if (!options) 1009 if (!options)
995 return 1; 1010 return 1;
@@ -1309,15 +1324,39 @@ set_qf_format:
1309 clear_opt(sbi->s_mount_opt, NOBH); 1324 clear_opt(sbi->s_mount_opt, NOBH);
1310 break; 1325 break;
1311 case Opt_extents: 1326 case Opt_extents:
1327 if (!EXT4_HAS_INCOMPAT_FEATURE(sb,
1328 EXT4_FEATURE_INCOMPAT_EXTENTS)) {
1329 ext4_warning(sb, __func__,
1330 "extents feature not enabled "
1331 "on this filesystem, use tune2fs\n");
1332 return 0;
1333 }
1312 set_opt (sbi->s_mount_opt, EXTENTS); 1334 set_opt (sbi->s_mount_opt, EXTENTS);
1313 break; 1335 break;
1314 case Opt_noextents: 1336 case Opt_noextents:
1337 /*
1338 * When e2fsprogs support resizing an already existing
1339 * ext3 file system to greater than 2**32 we need to
1340 * add support to block allocator to handle growing
1341 * already existing block mapped inode so that blocks
1342 * allocated for them fall within 2**32
1343 */
1344 last_block = ext4_blocks_count(sbi->s_es) - 1;
1345 if (last_block > 0xffffffffULL) {
1346 printk(KERN_ERR "EXT4-fs: Filesystem too "
1347 "large to mount with "
1348 "-o noextents options\n");
1349 return 0;
1350 }
1315 clear_opt (sbi->s_mount_opt, EXTENTS); 1351 clear_opt (sbi->s_mount_opt, EXTENTS);
1316 break; 1352 break;
1317 case Opt_i_version: 1353 case Opt_i_version:
1318 set_opt(sbi->s_mount_opt, I_VERSION); 1354 set_opt(sbi->s_mount_opt, I_VERSION);
1319 sb->s_flags |= MS_I_VERSION; 1355 sb->s_flags |= MS_I_VERSION;
1320 break; 1356 break;
1357 case Opt_nodelalloc:
1358 clear_opt(sbi->s_mount_opt, DELALLOC);
1359 break;
1321 case Opt_mballoc: 1360 case Opt_mballoc:
1322 set_opt(sbi->s_mount_opt, MBALLOC); 1361 set_opt(sbi->s_mount_opt, MBALLOC);
1323 break; 1362 break;
@@ -1331,6 +1370,9 @@ set_qf_format:
1331 return 0; 1370 return 0;
1332 sbi->s_stripe = option; 1371 sbi->s_stripe = option;
1333 break; 1372 break;
1373 case Opt_delalloc:
1374 set_opt(sbi->s_mount_opt, DELALLOC);
1375 break;
1334 default: 1376 default:
1335 printk (KERN_ERR 1377 printk (KERN_ERR
1336 "EXT4-fs: Unrecognized mount option \"%s\" " 1378 "EXT4-fs: Unrecognized mount option \"%s\" "
@@ -1443,6 +1485,54 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1443 return res; 1485 return res;
1444} 1486}
1445 1487
1488static int ext4_fill_flex_info(struct super_block *sb)
1489{
1490 struct ext4_sb_info *sbi = EXT4_SB(sb);
1491 struct ext4_group_desc *gdp = NULL;
1492 struct buffer_head *bh;
1493 ext4_group_t flex_group_count;
1494 ext4_group_t flex_group;
1495 int groups_per_flex = 0;
1496 __u64 block_bitmap = 0;
1497 int i;
1498
1499 if (!sbi->s_es->s_log_groups_per_flex) {
1500 sbi->s_log_groups_per_flex = 0;
1501 return 1;
1502 }
1503
1504 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1505 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1506
1507 flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
1508 groups_per_flex;
1509 sbi->s_flex_groups = kmalloc(flex_group_count *
1510 sizeof(struct flex_groups), GFP_KERNEL);
1511 if (sbi->s_flex_groups == NULL) {
1512 printk(KERN_ERR "EXT4-fs: not enough memory\n");
1513 goto failed;
1514 }
1515 memset(sbi->s_flex_groups, 0, flex_group_count *
1516 sizeof(struct flex_groups));
1517
1518 gdp = ext4_get_group_desc(sb, 1, &bh);
1519 block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
1520
1521 for (i = 0; i < sbi->s_groups_count; i++) {
1522 gdp = ext4_get_group_desc(sb, i, &bh);
1523
1524 flex_group = ext4_flex_group(sbi, i);
1525 sbi->s_flex_groups[flex_group].free_inodes +=
1526 le16_to_cpu(gdp->bg_free_inodes_count);
1527 sbi->s_flex_groups[flex_group].free_blocks +=
1528 le16_to_cpu(gdp->bg_free_blocks_count);
1529 }
1530
1531 return 1;
1532failed:
1533 return 0;
1534}
1535
1446__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, 1536__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
1447 struct ext4_group_desc *gdp) 1537 struct ext4_group_desc *gdp)
1448{ 1538{
@@ -1810,8 +1900,8 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
1810} 1900}
1811 1901
1812static int ext4_fill_super (struct super_block *sb, void *data, int silent) 1902static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1813 __releases(kernel_sem) 1903 __releases(kernel_lock)
1814 __acquires(kernel_sem) 1904 __acquires(kernel_lock)
1815 1905
1816{ 1906{
1817 struct buffer_head * bh; 1907 struct buffer_head * bh;
@@ -1851,11 +1941,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1851 goto out_fail; 1941 goto out_fail;
1852 } 1942 }
1853 1943
1854 if (!sb_set_blocksize(sb, blocksize)) {
1855 printk(KERN_ERR "EXT4-fs: bad blocksize %d.\n", blocksize);
1856 goto out_fail;
1857 }
1858
1859 /* 1944 /*
1860 * The ext4 superblock will not be buffer aligned for other than 1kB 1945 * The ext4 superblock will not be buffer aligned for other than 1kB
1861 * block sizes. We need to calculate the offset from buffer start. 1946 * block sizes. We need to calculate the offset from buffer start.
@@ -1919,15 +2004,28 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1919 2004
1920 /* 2005 /*
1921 * turn on extents feature by default in ext4 filesystem 2006 * turn on extents feature by default in ext4 filesystem
1922 * User -o noextents to turn it off 2007 * only if feature flag already set by mkfs or tune2fs.
2008 * Use -o noextents to turn it off
1923 */ 2009 */
1924 set_opt(sbi->s_mount_opt, EXTENTS); 2010 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
2011 set_opt(sbi->s_mount_opt, EXTENTS);
2012 else
2013 ext4_warning(sb, __func__,
2014 "extents feature not enabled on this filesystem, "
2015 "use tune2fs.\n");
1925 /* 2016 /*
1926 * turn on mballoc feature by default in ext4 filesystem 2017 * turn on mballoc code by default in ext4 filesystem
1927 * User -o nomballoc to turn it off 2018 * Use -o nomballoc to turn it off
1928 */ 2019 */
1929 set_opt(sbi->s_mount_opt, MBALLOC); 2020 set_opt(sbi->s_mount_opt, MBALLOC);
1930 2021
2022 /*
2023 * enable delayed allocation by default
2024 * Use -o nodelalloc to turn it off
2025 */
2026 set_opt(sbi->s_mount_opt, DELALLOC);
2027
2028
1931 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 2029 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
1932 NULL, 0)) 2030 NULL, 0))
1933 goto failed_mount; 2031 goto failed_mount;
@@ -2138,6 +2236,14 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2138 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); 2236 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
2139 goto failed_mount2; 2237 goto failed_mount2;
2140 } 2238 }
2239 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2240 if (!ext4_fill_flex_info(sb)) {
2241 printk(KERN_ERR
2242 "EXT4-fs: unable to initialize "
2243 "flex_bg meta info!\n");
2244 goto failed_mount2;
2245 }
2246
2141 sbi->s_gdb_count = db_count; 2247 sbi->s_gdb_count = db_count;
2142 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 2248 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
2143 spin_lock_init(&sbi->s_next_gen_lock); 2249 spin_lock_init(&sbi->s_next_gen_lock);
@@ -2358,6 +2464,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2358 test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": 2464 test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
2359 "writeback"); 2465 "writeback");
2360 2466
2467 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2468 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
2469 "requested data journaling mode\n");
2470 clear_opt(sbi->s_mount_opt, DELALLOC);
2471 } else if (test_opt(sb, DELALLOC))
2472 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
2473
2361 ext4_ext_init(sb); 2474 ext4_ext_init(sb);
2362 ext4_mb_init(sb, needs_recovery); 2475 ext4_mb_init(sb, needs_recovery);
2363 2476
@@ -2372,6 +2485,7 @@ cantfind_ext4:
2372 2485
2373failed_mount4: 2486failed_mount4:
2374 jbd2_journal_destroy(sbi->s_journal); 2487 jbd2_journal_destroy(sbi->s_journal);
2488 sbi->s_journal = NULL;
2375failed_mount3: 2489failed_mount3:
2376 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2490 percpu_counter_destroy(&sbi->s_freeblocks_counter);
2377 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2491 percpu_counter_destroy(&sbi->s_freeinodes_counter);
@@ -3325,7 +3439,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
3325 err = ext4_journal_dirty_metadata(handle, bh); 3439 err = ext4_journal_dirty_metadata(handle, bh);
3326 else { 3440 else {
3327 /* Always do at least ordered writes for quotas */ 3441 /* Always do at least ordered writes for quotas */
3328 err = ext4_journal_dirty_data(handle, bh); 3442 err = ext4_jbd2_file_inode(handle, inode);
3329 mark_buffer_dirty(bh); 3443 mark_buffer_dirty(bh);
3330 } 3444 }
3331 brelse(bh); 3445 brelse(bh);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index ff08633f398e..93c5fdcdad2e 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -810,7 +810,7 @@ inserted:
810 /* We need to allocate a new block */ 810 /* We need to allocate a new block */
811 ext4_fsblk_t goal = ext4_group_first_block_no(sb, 811 ext4_fsblk_t goal = ext4_group_first_block_no(sb,
812 EXT4_I(inode)->i_block_group); 812 EXT4_I(inode)->i_block_group);
813 ext4_fsblk_t block = ext4_new_block(handle, inode, 813 ext4_fsblk_t block = ext4_new_meta_block(handle, inode,
814 goal, &error); 814 goal, &error);
815 if (error) 815 if (error)
816 goto cleanup; 816 goto cleanup;
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index fff33382cadc..ac1a52cf2a37 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -13,13 +13,11 @@
13#include "ext4.h" 13#include "ext4.h"
14#include "xattr.h" 14#include "xattr.h"
15 15
16#define XATTR_TRUSTED_PREFIX "trusted."
17
18static size_t 16static size_t
19ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 17ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
20 const char *name, size_t name_len) 18 const char *name, size_t name_len)
21{ 19{
22 const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; 20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
23 const size_t total_len = prefix_len + name_len + 1; 21 const size_t total_len = prefix_len + name_len + 1;
24 22
25 if (!capable(CAP_SYS_ADMIN)) 23 if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index 67be723fcc4e..d91aa61b42aa 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -12,13 +12,11 @@
12#include "ext4.h" 12#include "ext4.h"
13#include "xattr.h" 13#include "xattr.h"
14 14
15#define XATTR_USER_PREFIX "user."
16
17static size_t 15static size_t
18ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size, 16ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
19 const char *name, size_t name_len) 17 const char *name, size_t name_len)
20{ 18{
21 const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; 19 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
22 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
23 21
24 if (!test_opt(inode->i_sb, XATTR_USER)) 22 if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 7f7947e3dfbb..ab2f57e3fb87 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -14,23 +14,11 @@ config GFS2_FS
14 GFS is perfect consistency -- changes made to the filesystem on one 14 GFS is perfect consistency -- changes made to the filesystem on one
15 machine show up immediately on all other machines in the cluster. 15 machine show up immediately on all other machines in the cluster.
16 16
17 To use the GFS2 filesystem, you will need to enable one or more of 17 To use the GFS2 filesystem in a cluster, you will need to enable
18 the below locking modules. Documentation and utilities for GFS2 can 18 the locking module below. Documentation and utilities for GFS2 can
19 be found here: http://sources.redhat.com/cluster 19 be found here: http://sources.redhat.com/cluster
20 20
21config GFS2_FS_LOCKING_NOLOCK 21 The "nolock" lock module is now built in to GFS2 by default.
22 tristate "GFS2 \"nolock\" locking module"
23 depends on GFS2_FS
24 help
25 Single node locking module for GFS2.
26
27 Use this module if you want to use GFS2 on a single node without
28 its clustering features. You can still take advantage of the
29 large file support, and upgrade to running a full cluster later on
30 if required.
31
32 If you will only be using GFS2 in cluster mode, you do not need this
33 module.
34 22
35config GFS2_FS_LOCKING_DLM 23config GFS2_FS_LOCKING_DLM
36 tristate "GFS2 DLM locking module" 24 tristate "GFS2 DLM locking module"
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index e2350df02a07..ec65851ec80a 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -5,6 +5,5 @@ gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
5 ops_fstype.o ops_inode.o ops_super.o quota.o \ 5 ops_fstype.o ops_inode.o ops_super.o quota.o \
6 recovery.o rgrp.o super.o sys.o trans.o util.o 6 recovery.o rgrp.o super.o sys.o trans.o util.o
7 7
8obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/
9obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/ 8obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/
10 9
diff --git a/fs/gfs2/gfs2.h b/fs/gfs2/gfs2.h
index 3bb11c0f8b56..ef606e3a5cf4 100644
--- a/fs/gfs2/gfs2.h
+++ b/fs/gfs2/gfs2.h
@@ -16,11 +16,6 @@ enum {
16}; 16};
17 17
18enum { 18enum {
19 NO_WAIT = 0,
20 WAIT = 1,
21};
22
23enum {
24 NO_FORCE = 0, 19 NO_FORCE = 0,
25 FORCE = 1, 20 FORCE = 1,
26}; 21};
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index d636b3e80f5d..13391e546616 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -45,21 +45,19 @@ struct gfs2_gl_hash_bucket {
45 struct hlist_head hb_list; 45 struct hlist_head hb_list;
46}; 46};
47 47
48struct glock_iter { 48struct gfs2_glock_iter {
49 int hash; /* hash bucket index */ 49 int hash; /* hash bucket index */
50 struct gfs2_sbd *sdp; /* incore superblock */ 50 struct gfs2_sbd *sdp; /* incore superblock */
51 struct gfs2_glock *gl; /* current glock struct */ 51 struct gfs2_glock *gl; /* current glock struct */
52 struct seq_file *seq; /* sequence file for debugfs */ 52 char string[512]; /* scratch space */
53 char string[512]; /* scratch space */
54}; 53};
55 54
56typedef void (*glock_examiner) (struct gfs2_glock * gl); 55typedef void (*glock_examiner) (struct gfs2_glock * gl);
57 56
58static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); 57static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
59static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); 58static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
60static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); 59#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0)
61static void gfs2_glock_drop_th(struct gfs2_glock *gl); 60static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
62static void run_queue(struct gfs2_glock *gl);
63 61
64static DECLARE_RWSEM(gfs2_umount_flush_sem); 62static DECLARE_RWSEM(gfs2_umount_flush_sem);
65static struct dentry *gfs2_root; 63static struct dentry *gfs2_root;
@@ -123,33 +121,6 @@ static inline rwlock_t *gl_lock_addr(unsigned int x)
123#endif 121#endif
124 122
125/** 123/**
126 * relaxed_state_ok - is a requested lock compatible with the current lock mode?
127 * @actual: the current state of the lock
128 * @requested: the lock state that was requested by the caller
129 * @flags: the modifier flags passed in by the caller
130 *
131 * Returns: 1 if the locks are compatible, 0 otherwise
132 */
133
134static inline int relaxed_state_ok(unsigned int actual, unsigned requested,
135 int flags)
136{
137 if (actual == requested)
138 return 1;
139
140 if (flags & GL_EXACT)
141 return 0;
142
143 if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED)
144 return 1;
145
146 if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY))
147 return 1;
148
149 return 0;
150}
151
152/**
153 * gl_hash() - Turn glock number into hash bucket number 124 * gl_hash() - Turn glock number into hash bucket number
154 * @lock: The glock number 125 * @lock: The glock number
155 * 126 *
@@ -182,7 +153,7 @@ static void glock_free(struct gfs2_glock *gl)
182 struct gfs2_sbd *sdp = gl->gl_sbd; 153 struct gfs2_sbd *sdp = gl->gl_sbd;
183 struct inode *aspace = gl->gl_aspace; 154 struct inode *aspace = gl->gl_aspace;
184 155
185 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 156 if (sdp->sd_lockstruct.ls_ops->lm_put_lock)
186 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock); 157 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock);
187 158
188 if (aspace) 159 if (aspace)
@@ -211,17 +182,14 @@ static void gfs2_glock_hold(struct gfs2_glock *gl)
211int gfs2_glock_put(struct gfs2_glock *gl) 182int gfs2_glock_put(struct gfs2_glock *gl)
212{ 183{
213 int rv = 0; 184 int rv = 0;
214 struct gfs2_sbd *sdp = gl->gl_sbd;
215 185
216 write_lock(gl_lock_addr(gl->gl_hash)); 186 write_lock(gl_lock_addr(gl->gl_hash));
217 if (atomic_dec_and_test(&gl->gl_ref)) { 187 if (atomic_dec_and_test(&gl->gl_ref)) {
218 hlist_del(&gl->gl_list); 188 hlist_del(&gl->gl_list);
219 write_unlock(gl_lock_addr(gl->gl_hash)); 189 write_unlock(gl_lock_addr(gl->gl_hash));
220 gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED); 190 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_UNLOCKED);
221 gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); 191 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_reclaim));
222 gfs2_assert(sdp, list_empty(&gl->gl_holders)); 192 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
223 gfs2_assert(sdp, list_empty(&gl->gl_waiters1));
224 gfs2_assert(sdp, list_empty(&gl->gl_waiters3));
225 glock_free(gl); 193 glock_free(gl);
226 rv = 1; 194 rv = 1;
227 goto out; 195 goto out;
@@ -281,22 +249,401 @@ static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp,
281 return gl; 249 return gl;
282} 250}
283 251
252/**
253 * may_grant - check if its ok to grant a new lock
254 * @gl: The glock
255 * @gh: The lock request which we wish to grant
256 *
257 * Returns: true if its ok to grant the lock
258 */
259
260static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh)
261{
262 const struct gfs2_holder *gh_head = list_entry(gl->gl_holders.next, const struct gfs2_holder, gh_list);
263 if ((gh->gh_state == LM_ST_EXCLUSIVE ||
264 gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head)
265 return 0;
266 if (gl->gl_state == gh->gh_state)
267 return 1;
268 if (gh->gh_flags & GL_EXACT)
269 return 0;
270 if (gl->gl_state == LM_ST_EXCLUSIVE) {
271 if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED)
272 return 1;
273 if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED)
274 return 1;
275 }
276 if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY))
277 return 1;
278 return 0;
279}
280
281static void gfs2_holder_wake(struct gfs2_holder *gh)
282{
283 clear_bit(HIF_WAIT, &gh->gh_iflags);
284 smp_mb__after_clear_bit();
285 wake_up_bit(&gh->gh_iflags, HIF_WAIT);
286}
287
288/**
289 * do_promote - promote as many requests as possible on the current queue
290 * @gl: The glock
291 *
292 * Returns: true if there is a blocked holder at the head of the list
293 */
294
295static int do_promote(struct gfs2_glock *gl)
296{
297 const struct gfs2_glock_operations *glops = gl->gl_ops;
298 struct gfs2_holder *gh, *tmp;
299 int ret;
300
301restart:
302 list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
303 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
304 continue;
305 if (may_grant(gl, gh)) {
306 if (gh->gh_list.prev == &gl->gl_holders &&
307 glops->go_lock) {
308 spin_unlock(&gl->gl_spin);
309 /* FIXME: eliminate this eventually */
310 ret = glops->go_lock(gh);
311 spin_lock(&gl->gl_spin);
312 if (ret) {
313 gh->gh_error = ret;
314 list_del_init(&gh->gh_list);
315 gfs2_holder_wake(gh);
316 goto restart;
317 }
318 set_bit(HIF_HOLDER, &gh->gh_iflags);
319 gfs2_holder_wake(gh);
320 goto restart;
321 }
322 set_bit(HIF_HOLDER, &gh->gh_iflags);
323 gfs2_holder_wake(gh);
324 continue;
325 }
326 if (gh->gh_list.prev == &gl->gl_holders)
327 return 1;
328 break;
329 }
330 return 0;
331}
332
333/**
334 * do_error - Something unexpected has happened during a lock request
335 *
336 */
337
338static inline void do_error(struct gfs2_glock *gl, const int ret)
339{
340 struct gfs2_holder *gh, *tmp;
341
342 list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
343 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
344 continue;
345 if (ret & LM_OUT_ERROR)
346 gh->gh_error = -EIO;
347 else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
348 gh->gh_error = GLR_TRYFAILED;
349 else
350 continue;
351 list_del_init(&gh->gh_list);
352 gfs2_holder_wake(gh);
353 }
354}
355
356/**
357 * find_first_waiter - find the first gh that's waiting for the glock
358 * @gl: the glock
359 */
360
361static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl)
362{
363 struct gfs2_holder *gh;
364
365 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
366 if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
367 return gh;
368 }
369 return NULL;
370}
371
372/**
373 * state_change - record that the glock is now in a different state
374 * @gl: the glock
375 * @new_state the new state
376 *
377 */
378
379static void state_change(struct gfs2_glock *gl, unsigned int new_state)
380{
381 int held1, held2;
382
383 held1 = (gl->gl_state != LM_ST_UNLOCKED);
384 held2 = (new_state != LM_ST_UNLOCKED);
385
386 if (held1 != held2) {
387 if (held2)
388 gfs2_glock_hold(gl);
389 else
390 gfs2_glock_put(gl);
391 }
392
393 gl->gl_state = new_state;
394 gl->gl_tchange = jiffies;
395}
396
397static void gfs2_demote_wake(struct gfs2_glock *gl)
398{
399 gl->gl_demote_state = LM_ST_EXCLUSIVE;
400 clear_bit(GLF_DEMOTE, &gl->gl_flags);
401 smp_mb__after_clear_bit();
402 wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
403}
404
405/**
406 * finish_xmote - The DLM has replied to one of our lock requests
407 * @gl: The glock
408 * @ret: The status from the DLM
409 *
410 */
411
412static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
413{
414 const struct gfs2_glock_operations *glops = gl->gl_ops;
415 struct gfs2_holder *gh;
416 unsigned state = ret & LM_OUT_ST_MASK;
417
418 spin_lock(&gl->gl_spin);
419 state_change(gl, state);
420 gh = find_first_waiter(gl);
421
422 /* Demote to UN request arrived during demote to SH or DF */
423 if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
424 state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED)
425 gl->gl_target = LM_ST_UNLOCKED;
426
427 /* Check for state != intended state */
428 if (unlikely(state != gl->gl_target)) {
429 if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) {
430 /* move to back of queue and try next entry */
431 if (ret & LM_OUT_CANCELED) {
432 if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0)
433 list_move_tail(&gh->gh_list, &gl->gl_holders);
434 gh = find_first_waiter(gl);
435 gl->gl_target = gh->gh_state;
436 goto retry;
437 }
438 /* Some error or failed "try lock" - report it */
439 if ((ret & LM_OUT_ERROR) ||
440 (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
441 gl->gl_target = gl->gl_state;
442 do_error(gl, ret);
443 goto out;
444 }
445 }
446 switch(state) {
447 /* Unlocked due to conversion deadlock, try again */
448 case LM_ST_UNLOCKED:
449retry:
450 do_xmote(gl, gh, gl->gl_target);
451 break;
452 /* Conversion fails, unlock and try again */
453 case LM_ST_SHARED:
454 case LM_ST_DEFERRED:
455 do_xmote(gl, gh, LM_ST_UNLOCKED);
456 break;
457 default: /* Everything else */
458 printk(KERN_ERR "GFS2: wanted %u got %u\n", gl->gl_target, state);
459 GLOCK_BUG_ON(gl, 1);
460 }
461 spin_unlock(&gl->gl_spin);
462 gfs2_glock_put(gl);
463 return;
464 }
465
466 /* Fast path - we got what we asked for */
467 if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags))
468 gfs2_demote_wake(gl);
469 if (state != LM_ST_UNLOCKED) {
470 if (glops->go_xmote_bh) {
471 int rv;
472 spin_unlock(&gl->gl_spin);
473 rv = glops->go_xmote_bh(gl, gh);
474 if (rv == -EAGAIN)
475 return;
476 spin_lock(&gl->gl_spin);
477 if (rv) {
478 do_error(gl, rv);
479 goto out;
480 }
481 }
482 do_promote(gl);
483 }
484out:
485 clear_bit(GLF_LOCK, &gl->gl_flags);
486 spin_unlock(&gl->gl_spin);
487 gfs2_glock_put(gl);
488}
489
490static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
491 unsigned int cur_state, unsigned int req_state,
492 unsigned int flags)
493{
494 int ret = LM_OUT_ERROR;
495
496 if (!sdp->sd_lockstruct.ls_ops->lm_lock)
497 return req_state == LM_ST_UNLOCKED ? 0 : req_state;
498
499 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
500 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state,
501 req_state, flags);
502 return ret;
503}
504
505/**
506 * do_xmote - Calls the DLM to change the state of a lock
507 * @gl: The lock state
508 * @gh: The holder (only for promotes)
509 * @target: The target lock state
510 *
511 */
512
513static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target)
514{
515 const struct gfs2_glock_operations *glops = gl->gl_ops;
516 struct gfs2_sbd *sdp = gl->gl_sbd;
517 unsigned int lck_flags = gh ? gh->gh_flags : 0;
518 int ret;
519
520 lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
521 LM_FLAG_PRIORITY);
522 BUG_ON(gl->gl_state == target);
523 BUG_ON(gl->gl_state == gl->gl_target);
524 if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
525 glops->go_inval) {
526 set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
527 do_error(gl, 0); /* Fail queued try locks */
528 }
529 spin_unlock(&gl->gl_spin);
530 if (glops->go_xmote_th)
531 glops->go_xmote_th(gl);
532 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
533 glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
534 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
535
536 gfs2_glock_hold(gl);
537 if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED ||
538 gl->gl_state == LM_ST_DEFERRED) &&
539 !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
540 lck_flags |= LM_FLAG_TRY_1CB;
541 ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, target, lck_flags);
542
543 if (!(ret & LM_OUT_ASYNC)) {
544 finish_xmote(gl, ret);
545 gfs2_glock_hold(gl);
546 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
547 gfs2_glock_put(gl);
548 } else {
549 GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC);
550 }
551 spin_lock(&gl->gl_spin);
552}
553
554/**
555 * find_first_holder - find the first "holder" gh
556 * @gl: the glock
557 */
558
559static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
560{
561 struct gfs2_holder *gh;
562
563 if (!list_empty(&gl->gl_holders)) {
564 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
565 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
566 return gh;
567 }
568 return NULL;
569}
570
571/**
572 * run_queue - do all outstanding tasks related to a glock
573 * @gl: The glock in question
574 * @nonblock: True if we must not block in run_queue
575 *
576 */
577
578static void run_queue(struct gfs2_glock *gl, const int nonblock)
579{
580 struct gfs2_holder *gh = NULL;
581
582 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
583 return;
584
585 GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
586
587 if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
588 gl->gl_demote_state != gl->gl_state) {
589 if (find_first_holder(gl))
590 goto out;
591 if (nonblock)
592 goto out_sched;
593 set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
594 GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
595 gl->gl_target = gl->gl_demote_state;
596 } else {
597 if (test_bit(GLF_DEMOTE, &gl->gl_flags))
598 gfs2_demote_wake(gl);
599 if (do_promote(gl) == 0)
600 goto out;
601 gh = find_first_waiter(gl);
602 gl->gl_target = gh->gh_state;
603 if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
604 do_error(gl, 0); /* Fail queued try locks */
605 }
606 do_xmote(gl, gh, gl->gl_target);
607 return;
608
609out_sched:
610 gfs2_glock_hold(gl);
611 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
612 gfs2_glock_put(gl);
613out:
614 clear_bit(GLF_LOCK, &gl->gl_flags);
615}
616
284static void glock_work_func(struct work_struct *work) 617static void glock_work_func(struct work_struct *work)
285{ 618{
619 unsigned long delay = 0;
286 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); 620 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
287 621
622 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags))
623 finish_xmote(gl, gl->gl_reply);
288 spin_lock(&gl->gl_spin); 624 spin_lock(&gl->gl_spin);
289 if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)) 625 if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
290 set_bit(GLF_DEMOTE, &gl->gl_flags); 626 gl->gl_state != LM_ST_UNLOCKED &&
291 run_queue(gl); 627 gl->gl_demote_state != LM_ST_EXCLUSIVE) {
628 unsigned long holdtime, now = jiffies;
629 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
630 if (time_before(now, holdtime))
631 delay = holdtime - now;
632 set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags);
633 }
634 run_queue(gl, 0);
292 spin_unlock(&gl->gl_spin); 635 spin_unlock(&gl->gl_spin);
293 gfs2_glock_put(gl); 636 if (!delay ||
637 queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
638 gfs2_glock_put(gl);
294} 639}
295 640
296static int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name, 641static int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
297 void **lockp) 642 void **lockp)
298{ 643{
299 int error = -EIO; 644 int error = -EIO;
645 if (!sdp->sd_lockstruct.ls_ops->lm_get_lock)
646 return 0;
300 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 647 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
301 error = sdp->sd_lockstruct.ls_ops->lm_get_lock( 648 error = sdp->sd_lockstruct.ls_ops->lm_get_lock(
302 sdp->sd_lockstruct.ls_lockspace, name, lockp); 649 sdp->sd_lockstruct.ls_lockspace, name, lockp);
@@ -342,12 +689,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
342 gl->gl_name = name; 689 gl->gl_name = name;
343 atomic_set(&gl->gl_ref, 1); 690 atomic_set(&gl->gl_ref, 1);
344 gl->gl_state = LM_ST_UNLOCKED; 691 gl->gl_state = LM_ST_UNLOCKED;
692 gl->gl_target = LM_ST_UNLOCKED;
345 gl->gl_demote_state = LM_ST_EXCLUSIVE; 693 gl->gl_demote_state = LM_ST_EXCLUSIVE;
346 gl->gl_hash = hash; 694 gl->gl_hash = hash;
347 gl->gl_owner_pid = NULL;
348 gl->gl_ip = 0;
349 gl->gl_ops = glops; 695 gl->gl_ops = glops;
350 gl->gl_req_gh = NULL;
351 gl->gl_stamp = jiffies; 696 gl->gl_stamp = jiffies;
352 gl->gl_tchange = jiffies; 697 gl->gl_tchange = jiffies;
353 gl->gl_object = NULL; 698 gl->gl_object = NULL;
@@ -447,13 +792,6 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
447 gh->gh_ip = 0; 792 gh->gh_ip = 0;
448} 793}
449 794
450static void gfs2_holder_wake(struct gfs2_holder *gh)
451{
452 clear_bit(HIF_WAIT, &gh->gh_iflags);
453 smp_mb__after_clear_bit();
454 wake_up_bit(&gh->gh_iflags, HIF_WAIT);
455}
456
457static int just_schedule(void *word) 795static int just_schedule(void *word)
458{ 796{
459 schedule(); 797 schedule();
@@ -466,14 +804,6 @@ static void wait_on_holder(struct gfs2_holder *gh)
466 wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE); 804 wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE);
467} 805}
468 806
469static void gfs2_demote_wake(struct gfs2_glock *gl)
470{
471 gl->gl_demote_state = LM_ST_EXCLUSIVE;
472 clear_bit(GLF_DEMOTE, &gl->gl_flags);
473 smp_mb__after_clear_bit();
474 wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
475}
476
477static void wait_on_demote(struct gfs2_glock *gl) 807static void wait_on_demote(struct gfs2_glock *gl)
478{ 808{
479 might_sleep(); 809 might_sleep();
@@ -481,217 +811,6 @@ static void wait_on_demote(struct gfs2_glock *gl)
481} 811}
482 812
483/** 813/**
484 * rq_mutex - process a mutex request in the queue
485 * @gh: the glock holder
486 *
487 * Returns: 1 if the queue is blocked
488 */
489
490static int rq_mutex(struct gfs2_holder *gh)
491{
492 struct gfs2_glock *gl = gh->gh_gl;
493
494 list_del_init(&gh->gh_list);
495 /* gh->gh_error never examined. */
496 set_bit(GLF_LOCK, &gl->gl_flags);
497 clear_bit(HIF_WAIT, &gh->gh_iflags);
498 smp_mb();
499 wake_up_bit(&gh->gh_iflags, HIF_WAIT);
500
501 return 1;
502}
503
504/**
505 * rq_promote - process a promote request in the queue
506 * @gh: the glock holder
507 *
508 * Acquire a new inter-node lock, or change a lock state to more restrictive.
509 *
510 * Returns: 1 if the queue is blocked
511 */
512
513static int rq_promote(struct gfs2_holder *gh)
514{
515 struct gfs2_glock *gl = gh->gh_gl;
516
517 if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
518 if (list_empty(&gl->gl_holders)) {
519 gl->gl_req_gh = gh;
520 set_bit(GLF_LOCK, &gl->gl_flags);
521 spin_unlock(&gl->gl_spin);
522 gfs2_glock_xmote_th(gh->gh_gl, gh);
523 spin_lock(&gl->gl_spin);
524 }
525 return 1;
526 }
527
528 if (list_empty(&gl->gl_holders)) {
529 set_bit(HIF_FIRST, &gh->gh_iflags);
530 set_bit(GLF_LOCK, &gl->gl_flags);
531 } else {
532 struct gfs2_holder *next_gh;
533 if (gh->gh_state == LM_ST_EXCLUSIVE)
534 return 1;
535 next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder,
536 gh_list);
537 if (next_gh->gh_state == LM_ST_EXCLUSIVE)
538 return 1;
539 }
540
541 list_move_tail(&gh->gh_list, &gl->gl_holders);
542 gh->gh_error = 0;
543 set_bit(HIF_HOLDER, &gh->gh_iflags);
544
545 gfs2_holder_wake(gh);
546
547 return 0;
548}
549
550/**
551 * rq_demote - process a demote request in the queue
552 * @gh: the glock holder
553 *
554 * Returns: 1 if the queue is blocked
555 */
556
557static int rq_demote(struct gfs2_glock *gl)
558{
559 if (!list_empty(&gl->gl_holders))
560 return 1;
561
562 if (gl->gl_state == gl->gl_demote_state ||
563 gl->gl_state == LM_ST_UNLOCKED) {
564 gfs2_demote_wake(gl);
565 return 0;
566 }
567
568 set_bit(GLF_LOCK, &gl->gl_flags);
569 set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
570
571 if (gl->gl_demote_state == LM_ST_UNLOCKED ||
572 gl->gl_state != LM_ST_EXCLUSIVE) {
573 spin_unlock(&gl->gl_spin);
574 gfs2_glock_drop_th(gl);
575 } else {
576 spin_unlock(&gl->gl_spin);
577 gfs2_glock_xmote_th(gl, NULL);
578 }
579
580 spin_lock(&gl->gl_spin);
581 clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
582
583 return 0;
584}
585
586/**
587 * run_queue - process holder structures on a glock
588 * @gl: the glock
589 *
590 */
591static void run_queue(struct gfs2_glock *gl)
592{
593 struct gfs2_holder *gh;
594 int blocked = 1;
595
596 for (;;) {
597 if (test_bit(GLF_LOCK, &gl->gl_flags))
598 break;
599
600 if (!list_empty(&gl->gl_waiters1)) {
601 gh = list_entry(gl->gl_waiters1.next,
602 struct gfs2_holder, gh_list);
603 blocked = rq_mutex(gh);
604 } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
605 blocked = rq_demote(gl);
606 if (test_bit(GLF_WAITERS2, &gl->gl_flags) &&
607 !blocked) {
608 set_bit(GLF_DEMOTE, &gl->gl_flags);
609 gl->gl_demote_state = LM_ST_UNLOCKED;
610 }
611 clear_bit(GLF_WAITERS2, &gl->gl_flags);
612 } else if (!list_empty(&gl->gl_waiters3)) {
613 gh = list_entry(gl->gl_waiters3.next,
614 struct gfs2_holder, gh_list);
615 blocked = rq_promote(gh);
616 } else
617 break;
618
619 if (blocked)
620 break;
621 }
622}
623
624/**
625 * gfs2_glmutex_lock - acquire a local lock on a glock
626 * @gl: the glock
627 *
628 * Gives caller exclusive access to manipulate a glock structure.
629 */
630
631static void gfs2_glmutex_lock(struct gfs2_glock *gl)
632{
633 spin_lock(&gl->gl_spin);
634 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
635 struct gfs2_holder gh;
636
637 gfs2_holder_init(gl, 0, 0, &gh);
638 set_bit(HIF_WAIT, &gh.gh_iflags);
639 list_add_tail(&gh.gh_list, &gl->gl_waiters1);
640 spin_unlock(&gl->gl_spin);
641 wait_on_holder(&gh);
642 gfs2_holder_uninit(&gh);
643 } else {
644 gl->gl_owner_pid = get_pid(task_pid(current));
645 gl->gl_ip = (unsigned long)__builtin_return_address(0);
646 spin_unlock(&gl->gl_spin);
647 }
648}
649
650/**
651 * gfs2_glmutex_trylock - try to acquire a local lock on a glock
652 * @gl: the glock
653 *
654 * Returns: 1 if the glock is acquired
655 */
656
657static int gfs2_glmutex_trylock(struct gfs2_glock *gl)
658{
659 int acquired = 1;
660
661 spin_lock(&gl->gl_spin);
662 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
663 acquired = 0;
664 } else {
665 gl->gl_owner_pid = get_pid(task_pid(current));
666 gl->gl_ip = (unsigned long)__builtin_return_address(0);
667 }
668 spin_unlock(&gl->gl_spin);
669
670 return acquired;
671}
672
673/**
674 * gfs2_glmutex_unlock - release a local lock on a glock
675 * @gl: the glock
676 *
677 */
678
679static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
680{
681 struct pid *pid;
682
683 spin_lock(&gl->gl_spin);
684 clear_bit(GLF_LOCK, &gl->gl_flags);
685 pid = gl->gl_owner_pid;
686 gl->gl_owner_pid = NULL;
687 gl->gl_ip = 0;
688 run_queue(gl);
689 spin_unlock(&gl->gl_spin);
690
691 put_pid(pid);
692}
693
694/**
695 * handle_callback - process a demote request 814 * handle_callback - process a demote request
696 * @gl: the glock 815 * @gl: the glock
697 * @state: the state the caller wants us to change to 816 * @state: the state the caller wants us to change to
@@ -705,398 +824,45 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
705{ 824{
706 int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; 825 int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
707 826
708 spin_lock(&gl->gl_spin);
709 set_bit(bit, &gl->gl_flags); 827 set_bit(bit, &gl->gl_flags);
710 if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { 828 if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
711 gl->gl_demote_state = state; 829 gl->gl_demote_state = state;
712 gl->gl_demote_time = jiffies; 830 gl->gl_demote_time = jiffies;
713 if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && 831 if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
714 gl->gl_object) { 832 gl->gl_object)
715 gfs2_glock_schedule_for_reclaim(gl); 833 gfs2_glock_schedule_for_reclaim(gl);
716 spin_unlock(&gl->gl_spin);
717 return;
718 }
719 } else if (gl->gl_demote_state != LM_ST_UNLOCKED && 834 } else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
720 gl->gl_demote_state != state) { 835 gl->gl_demote_state != state) {
721 if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) 836 gl->gl_demote_state = LM_ST_UNLOCKED;
722 set_bit(GLF_WAITERS2, &gl->gl_flags);
723 else
724 gl->gl_demote_state = LM_ST_UNLOCKED;
725 }
726 spin_unlock(&gl->gl_spin);
727}
728
729/**
730 * state_change - record that the glock is now in a different state
731 * @gl: the glock
732 * @new_state the new state
733 *
734 */
735
736static void state_change(struct gfs2_glock *gl, unsigned int new_state)
737{
738 int held1, held2;
739
740 held1 = (gl->gl_state != LM_ST_UNLOCKED);
741 held2 = (new_state != LM_ST_UNLOCKED);
742
743 if (held1 != held2) {
744 if (held2)
745 gfs2_glock_hold(gl);
746 else
747 gfs2_glock_put(gl);
748 } 837 }
749
750 gl->gl_state = new_state;
751 gl->gl_tchange = jiffies;
752} 838}
753 839
754/** 840/**
755 * drop_bh - Called after a lock module unlock completes 841 * gfs2_glock_wait - wait on a glock acquisition
756 * @gl: the glock
757 * @ret: the return status
758 *
759 * Doesn't wake up the process waiting on the struct gfs2_holder (if any)
760 * Doesn't drop the reference on the glock the top half took out
761 *
762 */
763
764static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
765{
766 struct gfs2_sbd *sdp = gl->gl_sbd;
767 struct gfs2_holder *gh = gl->gl_req_gh;
768
769 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
770 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
771 gfs2_assert_warn(sdp, !ret);
772
773 state_change(gl, LM_ST_UNLOCKED);
774
775 if (test_and_clear_bit(GLF_CONV_DEADLK, &gl->gl_flags)) {
776 spin_lock(&gl->gl_spin);
777 gh->gh_error = 0;
778 spin_unlock(&gl->gl_spin);
779 gfs2_glock_xmote_th(gl, gl->gl_req_gh);
780 gfs2_glock_put(gl);
781 return;
782 }
783
784 spin_lock(&gl->gl_spin);
785 gfs2_demote_wake(gl);
786 clear_bit(GLF_LOCK, &gl->gl_flags);
787 spin_unlock(&gl->gl_spin);
788 gfs2_glock_put(gl);
789}
790
791/**
792 * xmote_bh - Called after the lock module is done acquiring a lock
793 * @gl: The glock in question
794 * @ret: the int returned from the lock module
795 *
796 */
797
798static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
799{
800 struct gfs2_sbd *sdp = gl->gl_sbd;
801 const struct gfs2_glock_operations *glops = gl->gl_ops;
802 struct gfs2_holder *gh = gl->gl_req_gh;
803 int op_done = 1;
804
805 if (!gh && (ret & LM_OUT_ST_MASK) == LM_ST_UNLOCKED) {
806 drop_bh(gl, ret);
807 return;
808 }
809
810 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
811 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
812 gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC));
813
814 state_change(gl, ret & LM_OUT_ST_MASK);
815
816 /* Deal with each possible exit condition */
817
818 if (!gh) {
819 gl->gl_stamp = jiffies;
820 if (ret & LM_OUT_CANCELED) {
821 op_done = 0;
822 } else {
823 spin_lock(&gl->gl_spin);
824 if (gl->gl_state != gl->gl_demote_state) {
825 spin_unlock(&gl->gl_spin);
826 gfs2_glock_drop_th(gl);
827 gfs2_glock_put(gl);
828 return;
829 }
830 gfs2_demote_wake(gl);
831 spin_unlock(&gl->gl_spin);
832 }
833 } else {
834 spin_lock(&gl->gl_spin);
835 if (ret & LM_OUT_CONV_DEADLK) {
836 gh->gh_error = 0;
837 set_bit(GLF_CONV_DEADLK, &gl->gl_flags);
838 spin_unlock(&gl->gl_spin);
839 gfs2_glock_drop_th(gl);
840 gfs2_glock_put(gl);
841 return;
842 }
843 list_del_init(&gh->gh_list);
844 gh->gh_error = -EIO;
845 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
846 goto out;
847 gh->gh_error = GLR_CANCELED;
848 if (ret & LM_OUT_CANCELED)
849 goto out;
850 if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
851 list_add_tail(&gh->gh_list, &gl->gl_holders);
852 gh->gh_error = 0;
853 set_bit(HIF_HOLDER, &gh->gh_iflags);
854 set_bit(HIF_FIRST, &gh->gh_iflags);
855 op_done = 0;
856 goto out;
857 }
858 gh->gh_error = GLR_TRYFAILED;
859 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
860 goto out;
861 gh->gh_error = -EINVAL;
862 if (gfs2_assert_withdraw(sdp, 0) == -1)
863 fs_err(sdp, "ret = 0x%.8X\n", ret);
864out:
865 spin_unlock(&gl->gl_spin);
866 }
867
868 if (glops->go_xmote_bh)
869 glops->go_xmote_bh(gl);
870
871 if (op_done) {
872 spin_lock(&gl->gl_spin);
873 gl->gl_req_gh = NULL;
874 clear_bit(GLF_LOCK, &gl->gl_flags);
875 spin_unlock(&gl->gl_spin);
876 }
877
878 gfs2_glock_put(gl);
879
880 if (gh)
881 gfs2_holder_wake(gh);
882}
883
884static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
885 unsigned int cur_state, unsigned int req_state,
886 unsigned int flags)
887{
888 int ret = 0;
889 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
890 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state,
891 req_state, flags);
892 return ret;
893}
894
895/**
896 * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock
897 * @gl: The glock in question
898 * @state: the requested state
899 * @flags: modifier flags to the lock call
900 *
901 */
902
903static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh)
904{
905 struct gfs2_sbd *sdp = gl->gl_sbd;
906 int flags = gh ? gh->gh_flags : 0;
907 unsigned state = gh ? gh->gh_state : gl->gl_demote_state;
908 const struct gfs2_glock_operations *glops = gl->gl_ops;
909 int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
910 LM_FLAG_NOEXP | LM_FLAG_ANY |
911 LM_FLAG_PRIORITY);
912 unsigned int lck_ret;
913
914 if (glops->go_xmote_th)
915 glops->go_xmote_th(gl);
916 if (state == LM_ST_DEFERRED && glops->go_inval)
917 glops->go_inval(gl, DIO_METADATA);
918
919 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
920 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
921 gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED);
922 gfs2_assert_warn(sdp, state != gl->gl_state);
923
924 gfs2_glock_hold(gl);
925
926 lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state, lck_flags);
927
928 if (gfs2_assert_withdraw(sdp, !(lck_ret & LM_OUT_ERROR)))
929 return;
930
931 if (lck_ret & LM_OUT_ASYNC)
932 gfs2_assert_warn(sdp, lck_ret == LM_OUT_ASYNC);
933 else
934 xmote_bh(gl, lck_ret);
935}
936
937static unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock,
938 unsigned int cur_state)
939{
940 int ret = 0;
941 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
942 ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state);
943 return ret;
944}
945
946/**
947 * gfs2_glock_drop_th - call into the lock module to unlock a lock
948 * @gl: the glock
949 *
950 */
951
952static void gfs2_glock_drop_th(struct gfs2_glock *gl)
953{
954 struct gfs2_sbd *sdp = gl->gl_sbd;
955 const struct gfs2_glock_operations *glops = gl->gl_ops;
956 unsigned int ret;
957
958 if (glops->go_xmote_th)
959 glops->go_xmote_th(gl);
960 if (glops->go_inval)
961 glops->go_inval(gl, DIO_METADATA);
962
963 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
964 gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
965 gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
966
967 gfs2_glock_hold(gl);
968
969 ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state);
970
971 if (gfs2_assert_withdraw(sdp, !(ret & LM_OUT_ERROR)))
972 return;
973
974 if (!ret)
975 drop_bh(gl, ret);
976 else
977 gfs2_assert_warn(sdp, ret == LM_OUT_ASYNC);
978}
979
980/**
981 * do_cancels - cancel requests for locks stuck waiting on an expire flag
982 * @gh: the LM_FLAG_PRIORITY holder waiting to acquire the lock
983 *
984 * Don't cancel GL_NOCANCEL requests.
985 */
986
987static void do_cancels(struct gfs2_holder *gh)
988{
989 struct gfs2_glock *gl = gh->gh_gl;
990 struct gfs2_sbd *sdp = gl->gl_sbd;
991
992 spin_lock(&gl->gl_spin);
993
994 while (gl->gl_req_gh != gh &&
995 !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
996 !list_empty(&gh->gh_list)) {
997 if (!(gl->gl_req_gh && (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) {
998 spin_unlock(&gl->gl_spin);
999 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
1000 sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock);
1001 msleep(100);
1002 spin_lock(&gl->gl_spin);
1003 } else {
1004 spin_unlock(&gl->gl_spin);
1005 msleep(100);
1006 spin_lock(&gl->gl_spin);
1007 }
1008 }
1009
1010 spin_unlock(&gl->gl_spin);
1011}
1012
1013/**
1014 * glock_wait_internal - wait on a glock acquisition
1015 * @gh: the glock holder 842 * @gh: the glock holder
1016 * 843 *
1017 * Returns: 0 on success 844 * Returns: 0 on success
1018 */ 845 */
1019 846
1020static int glock_wait_internal(struct gfs2_holder *gh) 847int gfs2_glock_wait(struct gfs2_holder *gh)
1021{ 848{
1022 struct gfs2_glock *gl = gh->gh_gl;
1023 struct gfs2_sbd *sdp = gl->gl_sbd;
1024 const struct gfs2_glock_operations *glops = gl->gl_ops;
1025
1026 if (test_bit(HIF_ABORTED, &gh->gh_iflags))
1027 return -EIO;
1028
1029 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
1030 spin_lock(&gl->gl_spin);
1031 if (gl->gl_req_gh != gh &&
1032 !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
1033 !list_empty(&gh->gh_list)) {
1034 list_del_init(&gh->gh_list);
1035 gh->gh_error = GLR_TRYFAILED;
1036 run_queue(gl);
1037 spin_unlock(&gl->gl_spin);
1038 return gh->gh_error;
1039 }
1040 spin_unlock(&gl->gl_spin);
1041 }
1042
1043 if (gh->gh_flags & LM_FLAG_PRIORITY)
1044 do_cancels(gh);
1045
1046 wait_on_holder(gh); 849 wait_on_holder(gh);
1047 if (gh->gh_error)
1048 return gh->gh_error;
1049
1050 gfs2_assert_withdraw(sdp, test_bit(HIF_HOLDER, &gh->gh_iflags));
1051 gfs2_assert_withdraw(sdp, relaxed_state_ok(gl->gl_state, gh->gh_state,
1052 gh->gh_flags));
1053
1054 if (test_bit(HIF_FIRST, &gh->gh_iflags)) {
1055 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1056
1057 if (glops->go_lock) {
1058 gh->gh_error = glops->go_lock(gh);
1059 if (gh->gh_error) {
1060 spin_lock(&gl->gl_spin);
1061 list_del_init(&gh->gh_list);
1062 spin_unlock(&gl->gl_spin);
1063 }
1064 }
1065
1066 spin_lock(&gl->gl_spin);
1067 gl->gl_req_gh = NULL;
1068 clear_bit(GLF_LOCK, &gl->gl_flags);
1069 run_queue(gl);
1070 spin_unlock(&gl->gl_spin);
1071 }
1072
1073 return gh->gh_error; 850 return gh->gh_error;
1074} 851}
1075 852
1076static inline struct gfs2_holder * 853void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
1077find_holder_by_owner(struct list_head *head, struct pid *pid)
1078{
1079 struct gfs2_holder *gh;
1080
1081 list_for_each_entry(gh, head, gh_list) {
1082 if (gh->gh_owner_pid == pid)
1083 return gh;
1084 }
1085
1086 return NULL;
1087}
1088
1089static void print_dbg(struct glock_iter *gi, const char *fmt, ...)
1090{ 854{
1091 va_list args; 855 va_list args;
1092 856
1093 va_start(args, fmt); 857 va_start(args, fmt);
1094 if (gi) { 858 if (seq) {
859 struct gfs2_glock_iter *gi = seq->private;
1095 vsprintf(gi->string, fmt, args); 860 vsprintf(gi->string, fmt, args);
1096 seq_printf(gi->seq, gi->string); 861 seq_printf(seq, gi->string);
1097 } 862 } else {
1098 else 863 printk(KERN_ERR " ");
1099 vprintk(fmt, args); 864 vprintk(fmt, args);
865 }
1100 va_end(args); 866 va_end(args);
1101} 867}
1102 868
@@ -1104,50 +870,76 @@ static void print_dbg(struct glock_iter *gi, const char *fmt, ...)
1104 * add_to_queue - Add a holder to the wait queue (but look for recursion) 870 * add_to_queue - Add a holder to the wait queue (but look for recursion)
1105 * @gh: the holder structure to add 871 * @gh: the holder structure to add
1106 * 872 *
873 * Eventually we should move the recursive locking trap to a
874 * debugging option or something like that. This is the fast
875 * path and needs to have the minimum number of distractions.
876 *
1107 */ 877 */
1108 878
1109static void add_to_queue(struct gfs2_holder *gh) 879static inline void add_to_queue(struct gfs2_holder *gh)
1110{ 880{
1111 struct gfs2_glock *gl = gh->gh_gl; 881 struct gfs2_glock *gl = gh->gh_gl;
1112 struct gfs2_holder *existing; 882 struct gfs2_sbd *sdp = gl->gl_sbd;
883 struct list_head *insert_pt = NULL;
884 struct gfs2_holder *gh2;
885 int try_lock = 0;
1113 886
1114 BUG_ON(gh->gh_owner_pid == NULL); 887 BUG_ON(gh->gh_owner_pid == NULL);
1115 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) 888 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
1116 BUG(); 889 BUG();
1117 890
1118 if (!(gh->gh_flags & GL_FLOCK)) { 891 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
1119 existing = find_holder_by_owner(&gl->gl_holders, 892 if (test_bit(GLF_LOCK, &gl->gl_flags))
1120 gh->gh_owner_pid); 893 try_lock = 1;
1121 if (existing) { 894 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
1122 print_symbol(KERN_WARNING "original: %s\n", 895 goto fail;
1123 existing->gh_ip); 896 }
1124 printk(KERN_INFO "pid : %d\n", 897
1125 pid_nr(existing->gh_owner_pid)); 898 list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
1126 printk(KERN_INFO "lock type : %d lock state : %d\n", 899 if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
1127 existing->gh_gl->gl_name.ln_type, 900 (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
1128 existing->gh_gl->gl_state); 901 goto trap_recursive;
1129 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); 902 if (try_lock &&
1130 printk(KERN_INFO "pid : %d\n", 903 !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) &&
1131 pid_nr(gh->gh_owner_pid)); 904 !may_grant(gl, gh)) {
1132 printk(KERN_INFO "lock type : %d lock state : %d\n", 905fail:
1133 gl->gl_name.ln_type, gl->gl_state); 906 gh->gh_error = GLR_TRYFAILED;
1134 BUG(); 907 gfs2_holder_wake(gh);
1135 } 908 return;
1136
1137 existing = find_holder_by_owner(&gl->gl_waiters3,
1138 gh->gh_owner_pid);
1139 if (existing) {
1140 print_symbol(KERN_WARNING "original: %s\n",
1141 existing->gh_ip);
1142 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
1143 BUG();
1144 } 909 }
910 if (test_bit(HIF_HOLDER, &gh2->gh_iflags))
911 continue;
912 if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt))
913 insert_pt = &gh2->gh_list;
914 }
915 if (likely(insert_pt == NULL)) {
916 list_add_tail(&gh->gh_list, &gl->gl_holders);
917 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
918 goto do_cancel;
919 return;
920 }
921 list_add_tail(&gh->gh_list, insert_pt);
922do_cancel:
923 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
924 if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
925 spin_unlock(&gl->gl_spin);
926 if (sdp->sd_lockstruct.ls_ops->lm_cancel)
927 sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock);
928 spin_lock(&gl->gl_spin);
1145 } 929 }
930 return;
1146 931
1147 if (gh->gh_flags & LM_FLAG_PRIORITY) 932trap_recursive:
1148 list_add(&gh->gh_list, &gl->gl_waiters3); 933 print_symbol(KERN_ERR "original: %s\n", gh2->gh_ip);
1149 else 934 printk(KERN_ERR "pid: %d\n", pid_nr(gh2->gh_owner_pid));
1150 list_add_tail(&gh->gh_list, &gl->gl_waiters3); 935 printk(KERN_ERR "lock type: %d req lock state : %d\n",
936 gh2->gh_gl->gl_name.ln_type, gh2->gh_state);
937 print_symbol(KERN_ERR "new: %s\n", gh->gh_ip);
938 printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid));
939 printk(KERN_ERR "lock type: %d req lock state : %d\n",
940 gh->gh_gl->gl_name.ln_type, gh->gh_state);
941 __dump_glock(NULL, gl);
942 BUG();
1151} 943}
1152 944
1153/** 945/**
@@ -1165,24 +957,16 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
1165 struct gfs2_sbd *sdp = gl->gl_sbd; 957 struct gfs2_sbd *sdp = gl->gl_sbd;
1166 int error = 0; 958 int error = 0;
1167 959
1168restart: 960 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
1169 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
1170 set_bit(HIF_ABORTED, &gh->gh_iflags);
1171 return -EIO; 961 return -EIO;
1172 }
1173 962
1174 spin_lock(&gl->gl_spin); 963 spin_lock(&gl->gl_spin);
1175 add_to_queue(gh); 964 add_to_queue(gh);
1176 run_queue(gl); 965 run_queue(gl, 1);
1177 spin_unlock(&gl->gl_spin); 966 spin_unlock(&gl->gl_spin);
1178 967
1179 if (!(gh->gh_flags & GL_ASYNC)) { 968 if (!(gh->gh_flags & GL_ASYNC))
1180 error = glock_wait_internal(gh); 969 error = gfs2_glock_wait(gh);
1181 if (error == GLR_CANCELED) {
1182 msleep(100);
1183 goto restart;
1184 }
1185 }
1186 970
1187 return error; 971 return error;
1188} 972}
@@ -1196,48 +980,7 @@ restart:
1196 980
1197int gfs2_glock_poll(struct gfs2_holder *gh) 981int gfs2_glock_poll(struct gfs2_holder *gh)
1198{ 982{
1199 struct gfs2_glock *gl = gh->gh_gl; 983 return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1;
1200 int ready = 0;
1201
1202 spin_lock(&gl->gl_spin);
1203
1204 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
1205 ready = 1;
1206 else if (list_empty(&gh->gh_list)) {
1207 if (gh->gh_error == GLR_CANCELED) {
1208 spin_unlock(&gl->gl_spin);
1209 msleep(100);
1210 if (gfs2_glock_nq(gh))
1211 return 1;
1212 return 0;
1213 } else
1214 ready = 1;
1215 }
1216
1217 spin_unlock(&gl->gl_spin);
1218
1219 return ready;
1220}
1221
1222/**
1223 * gfs2_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC
1224 * @gh: the holder structure
1225 *
1226 * Returns: 0, GLR_TRYFAILED, or errno on failure
1227 */
1228
1229int gfs2_glock_wait(struct gfs2_holder *gh)
1230{
1231 int error;
1232
1233 error = glock_wait_internal(gh);
1234 if (error == GLR_CANCELED) {
1235 msleep(100);
1236 gh->gh_flags &= ~GL_ASYNC;
1237 error = gfs2_glock_nq(gh);
1238 }
1239
1240 return error;
1241} 984}
1242 985
1243/** 986/**
@@ -1251,26 +994,30 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1251 struct gfs2_glock *gl = gh->gh_gl; 994 struct gfs2_glock *gl = gh->gh_gl;
1252 const struct gfs2_glock_operations *glops = gl->gl_ops; 995 const struct gfs2_glock_operations *glops = gl->gl_ops;
1253 unsigned delay = 0; 996 unsigned delay = 0;
997 int fast_path = 0;
1254 998
999 spin_lock(&gl->gl_spin);
1255 if (gh->gh_flags & GL_NOCACHE) 1000 if (gh->gh_flags & GL_NOCACHE)
1256 handle_callback(gl, LM_ST_UNLOCKED, 0, 0); 1001 handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
1257 1002
1258 gfs2_glmutex_lock(gl);
1259
1260 spin_lock(&gl->gl_spin);
1261 list_del_init(&gh->gh_list); 1003 list_del_init(&gh->gh_list);
1262 1004 if (find_first_holder(gl) == NULL) {
1263 if (list_empty(&gl->gl_holders)) {
1264 if (glops->go_unlock) { 1005 if (glops->go_unlock) {
1006 GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags));
1265 spin_unlock(&gl->gl_spin); 1007 spin_unlock(&gl->gl_spin);
1266 glops->go_unlock(gh); 1008 glops->go_unlock(gh);
1267 spin_lock(&gl->gl_spin); 1009 spin_lock(&gl->gl_spin);
1010 clear_bit(GLF_LOCK, &gl->gl_flags);
1268 } 1011 }
1269 gl->gl_stamp = jiffies; 1012 gl->gl_stamp = jiffies;
1013 if (list_empty(&gl->gl_holders) &&
1014 !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1015 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1016 fast_path = 1;
1270 } 1017 }
1271
1272 clear_bit(GLF_LOCK, &gl->gl_flags);
1273 spin_unlock(&gl->gl_spin); 1018 spin_unlock(&gl->gl_spin);
1019 if (likely(fast_path))
1020 return;
1274 1021
1275 gfs2_glock_hold(gl); 1022 gfs2_glock_hold(gl);
1276 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 1023 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
@@ -1454,6 +1201,8 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1454static int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp) 1201static int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp)
1455{ 1202{
1456 int error = -EIO; 1203 int error = -EIO;
1204 if (!sdp->sd_lockstruct.ls_ops->lm_hold_lvb)
1205 return 0;
1457 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 1206 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
1458 error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp); 1207 error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp);
1459 return error; 1208 return error;
@@ -1469,20 +1218,14 @@ int gfs2_lvb_hold(struct gfs2_glock *gl)
1469{ 1218{
1470 int error; 1219 int error;
1471 1220
1472 gfs2_glmutex_lock(gl);
1473
1474 if (!atomic_read(&gl->gl_lvb_count)) { 1221 if (!atomic_read(&gl->gl_lvb_count)) {
1475 error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb); 1222 error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb);
1476 if (error) { 1223 if (error)
1477 gfs2_glmutex_unlock(gl);
1478 return error; 1224 return error;
1479 }
1480 gfs2_glock_hold(gl); 1225 gfs2_glock_hold(gl);
1481 } 1226 }
1482 atomic_inc(&gl->gl_lvb_count); 1227 atomic_inc(&gl->gl_lvb_count);
1483 1228
1484 gfs2_glmutex_unlock(gl);
1485
1486 return 0; 1229 return 0;
1487} 1230}
1488 1231
@@ -1497,17 +1240,13 @@ void gfs2_lvb_unhold(struct gfs2_glock *gl)
1497 struct gfs2_sbd *sdp = gl->gl_sbd; 1240 struct gfs2_sbd *sdp = gl->gl_sbd;
1498 1241
1499 gfs2_glock_hold(gl); 1242 gfs2_glock_hold(gl);
1500 gfs2_glmutex_lock(gl);
1501
1502 gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0); 1243 gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0);
1503 if (atomic_dec_and_test(&gl->gl_lvb_count)) { 1244 if (atomic_dec_and_test(&gl->gl_lvb_count)) {
1504 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 1245 if (sdp->sd_lockstruct.ls_ops->lm_unhold_lvb)
1505 sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, gl->gl_lvb); 1246 sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, gl->gl_lvb);
1506 gl->gl_lvb = NULL; 1247 gl->gl_lvb = NULL;
1507 gfs2_glock_put(gl); 1248 gfs2_glock_put(gl);
1508 } 1249 }
1509
1510 gfs2_glmutex_unlock(gl);
1511 gfs2_glock_put(gl); 1250 gfs2_glock_put(gl);
1512} 1251}
1513 1252
@@ -1527,7 +1266,9 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1527 if (time_before(now, holdtime)) 1266 if (time_before(now, holdtime))
1528 delay = holdtime - now; 1267 delay = holdtime - now;
1529 1268
1269 spin_lock(&gl->gl_spin);
1530 handle_callback(gl, state, 1, delay); 1270 handle_callback(gl, state, 1, delay);
1271 spin_unlock(&gl->gl_spin);
1531 if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) 1272 if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
1532 gfs2_glock_put(gl); 1273 gfs2_glock_put(gl);
1533} 1274}
@@ -1568,7 +1309,8 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
1568 gl = gfs2_glock_find(sdp, &async->lc_name); 1309 gl = gfs2_glock_find(sdp, &async->lc_name);
1569 if (gfs2_assert_warn(sdp, gl)) 1310 if (gfs2_assert_warn(sdp, gl))
1570 return; 1311 return;
1571 xmote_bh(gl, async->lc_ret); 1312 gl->gl_reply = async->lc_ret;
1313 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1572 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 1314 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1573 gfs2_glock_put(gl); 1315 gfs2_glock_put(gl);
1574 up_read(&gfs2_umount_flush_sem); 1316 up_read(&gfs2_umount_flush_sem);
@@ -1581,11 +1323,6 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
1581 wake_up_process(sdp->sd_recoverd_process); 1323 wake_up_process(sdp->sd_recoverd_process);
1582 return; 1324 return;
1583 1325
1584 case LM_CB_DROPLOCKS:
1585 gfs2_gl_hash_clear(sdp, NO_WAIT);
1586 gfs2_quota_scan(sdp);
1587 return;
1588
1589 default: 1326 default:
1590 gfs2_assert_warn(sdp, 0); 1327 gfs2_assert_warn(sdp, 0);
1591 return; 1328 return;
@@ -1646,6 +1383,7 @@ void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
1646void gfs2_reclaim_glock(struct gfs2_sbd *sdp) 1383void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
1647{ 1384{
1648 struct gfs2_glock *gl; 1385 struct gfs2_glock *gl;
1386 int done_callback = 0;
1649 1387
1650 spin_lock(&sdp->sd_reclaim_lock); 1388 spin_lock(&sdp->sd_reclaim_lock);
1651 if (list_empty(&sdp->sd_reclaim_list)) { 1389 if (list_empty(&sdp->sd_reclaim_list)) {
@@ -1660,14 +1398,16 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
1660 atomic_dec(&sdp->sd_reclaim_count); 1398 atomic_dec(&sdp->sd_reclaim_count);
1661 atomic_inc(&sdp->sd_reclaimed); 1399 atomic_inc(&sdp->sd_reclaimed);
1662 1400
1663 if (gfs2_glmutex_trylock(gl)) { 1401 spin_lock(&gl->gl_spin);
1664 if (list_empty(&gl->gl_holders) && 1402 if (find_first_holder(gl) == NULL &&
1665 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) 1403 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) {
1666 handle_callback(gl, LM_ST_UNLOCKED, 0, 0); 1404 handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
1667 gfs2_glmutex_unlock(gl); 1405 done_callback = 1;
1668 } 1406 }
1669 1407 spin_unlock(&gl->gl_spin);
1670 gfs2_glock_put(gl); 1408 if (!done_callback ||
1409 queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1410 gfs2_glock_put(gl);
1671} 1411}
1672 1412
1673/** 1413/**
@@ -1724,18 +1464,14 @@ static void scan_glock(struct gfs2_glock *gl)
1724{ 1464{
1725 if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) 1465 if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object)
1726 return; 1466 return;
1467 if (test_bit(GLF_LOCK, &gl->gl_flags))
1468 return;
1727 1469
1728 if (gfs2_glmutex_trylock(gl)) { 1470 spin_lock(&gl->gl_spin);
1729 if (list_empty(&gl->gl_holders) && 1471 if (find_first_holder(gl) == NULL &&
1730 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) 1472 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
1731 goto out_schedule; 1473 gfs2_glock_schedule_for_reclaim(gl);
1732 gfs2_glmutex_unlock(gl); 1474 spin_unlock(&gl->gl_spin);
1733 }
1734 return;
1735
1736out_schedule:
1737 gfs2_glmutex_unlock(gl);
1738 gfs2_glock_schedule_for_reclaim(gl);
1739} 1475}
1740 1476
1741/** 1477/**
@@ -1760,12 +1496,13 @@ static void clear_glock(struct gfs2_glock *gl)
1760 spin_unlock(&sdp->sd_reclaim_lock); 1496 spin_unlock(&sdp->sd_reclaim_lock);
1761 } 1497 }
1762 1498
1763 if (gfs2_glmutex_trylock(gl)) { 1499 spin_lock(&gl->gl_spin);
1764 if (list_empty(&gl->gl_holders) && 1500 if (find_first_holder(gl) == NULL && gl->gl_state != LM_ST_UNLOCKED)
1765 gl->gl_state != LM_ST_UNLOCKED) 1501 handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
1766 handle_callback(gl, LM_ST_UNLOCKED, 0, 0); 1502 spin_unlock(&gl->gl_spin);
1767 gfs2_glmutex_unlock(gl); 1503 gfs2_glock_hold(gl);
1768 } 1504 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1505 gfs2_glock_put(gl);
1769} 1506}
1770 1507
1771/** 1508/**
@@ -1773,11 +1510,10 @@ static void clear_glock(struct gfs2_glock *gl)
1773 * @sdp: the filesystem 1510 * @sdp: the filesystem
1774 * @wait: wait until it's all gone 1511 * @wait: wait until it's all gone
1775 * 1512 *
1776 * Called when unmounting the filesystem, or when inter-node lock manager 1513 * Called when unmounting the filesystem.
1777 * requests DROPLOCKS because it is running out of capacity.
1778 */ 1514 */
1779 1515
1780void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) 1516void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
1781{ 1517{
1782 unsigned long t; 1518 unsigned long t;
1783 unsigned int x; 1519 unsigned int x;
@@ -1792,7 +1528,7 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
1792 cont = 1; 1528 cont = 1;
1793 } 1529 }
1794 1530
1795 if (!wait || !cont) 1531 if (!cont)
1796 break; 1532 break;
1797 1533
1798 if (time_after_eq(jiffies, 1534 if (time_after_eq(jiffies,
@@ -1810,180 +1546,164 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
1810 } 1546 }
1811} 1547}
1812 1548
1813/* 1549static const char *state2str(unsigned state)
1814 * Diagnostic routines to help debug distributed deadlock
1815 */
1816
1817static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt,
1818 unsigned long address)
1819{ 1550{
1820 char buffer[KSYM_SYMBOL_LEN]; 1551 switch(state) {
1821 1552 case LM_ST_UNLOCKED:
1822 sprint_symbol(buffer, address); 1553 return "UN";
1823 print_dbg(gi, fmt, buffer); 1554 case LM_ST_SHARED:
1555 return "SH";
1556 case LM_ST_DEFERRED:
1557 return "DF";
1558 case LM_ST_EXCLUSIVE:
1559 return "EX";
1560 }
1561 return "??";
1562}
1563
1564static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
1565{
1566 char *p = buf;
1567 if (flags & LM_FLAG_TRY)
1568 *p++ = 't';
1569 if (flags & LM_FLAG_TRY_1CB)
1570 *p++ = 'T';
1571 if (flags & LM_FLAG_NOEXP)
1572 *p++ = 'e';
1573 if (flags & LM_FLAG_ANY)
1574 *p++ = 'a';
1575 if (flags & LM_FLAG_PRIORITY)
1576 *p++ = 'p';
1577 if (flags & GL_ASYNC)
1578 *p++ = 'a';
1579 if (flags & GL_EXACT)
1580 *p++ = 'E';
1581 if (flags & GL_ATIME)
1582 *p++ = 'a';
1583 if (flags & GL_NOCACHE)
1584 *p++ = 'c';
1585 if (test_bit(HIF_HOLDER, &iflags))
1586 *p++ = 'H';
1587 if (test_bit(HIF_WAIT, &iflags))
1588 *p++ = 'W';
1589 if (test_bit(HIF_FIRST, &iflags))
1590 *p++ = 'F';
1591 *p = 0;
1592 return buf;
1824} 1593}
1825 1594
1826/** 1595/**
1827 * dump_holder - print information about a glock holder 1596 * dump_holder - print information about a glock holder
1828 * @str: a string naming the type of holder 1597 * @seq: the seq_file struct
1829 * @gh: the glock holder 1598 * @gh: the glock holder
1830 * 1599 *
1831 * Returns: 0 on success, -ENOBUFS when we run out of space 1600 * Returns: 0 on success, -ENOBUFS when we run out of space
1832 */ 1601 */
1833 1602
1834static int dump_holder(struct glock_iter *gi, char *str, 1603static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
1835 struct gfs2_holder *gh)
1836{ 1604{
1837 unsigned int x; 1605 struct task_struct *gh_owner = NULL;
1838 struct task_struct *gh_owner; 1606 char buffer[KSYM_SYMBOL_LEN];
1607 char flags_buf[32];
1839 1608
1840 print_dbg(gi, " %s\n", str); 1609 sprint_symbol(buffer, gh->gh_ip);
1841 if (gh->gh_owner_pid) { 1610 if (gh->gh_owner_pid)
1842 print_dbg(gi, " owner = %ld ",
1843 (long)pid_nr(gh->gh_owner_pid));
1844 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); 1611 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
1845 if (gh_owner) 1612 gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n",
1846 print_dbg(gi, "(%s)\n", gh_owner->comm); 1613 state2str(gh->gh_state),
1847 else 1614 hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
1848 print_dbg(gi, "(ended)\n"); 1615 gh->gh_error,
1849 } else 1616 gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
1850 print_dbg(gi, " owner = -1\n"); 1617 gh_owner ? gh_owner->comm : "(ended)", buffer);
1851 print_dbg(gi, " gh_state = %u\n", gh->gh_state);
1852 print_dbg(gi, " gh_flags =");
1853 for (x = 0; x < 32; x++)
1854 if (gh->gh_flags & (1 << x))
1855 print_dbg(gi, " %u", x);
1856 print_dbg(gi, " \n");
1857 print_dbg(gi, " error = %d\n", gh->gh_error);
1858 print_dbg(gi, " gh_iflags =");
1859 for (x = 0; x < 32; x++)
1860 if (test_bit(x, &gh->gh_iflags))
1861 print_dbg(gi, " %u", x);
1862 print_dbg(gi, " \n");
1863 gfs2_print_symbol(gi, " initialized at: %s\n", gh->gh_ip);
1864
1865 return 0; 1618 return 0;
1866} 1619}
1867 1620
1868/** 1621static const char *gflags2str(char *buf, const unsigned long *gflags)
1869 * dump_inode - print information about an inode 1622{
1870 * @ip: the inode 1623 char *p = buf;
1871 * 1624 if (test_bit(GLF_LOCK, gflags))
1872 * Returns: 0 on success, -ENOBUFS when we run out of space 1625 *p++ = 'l';
1873 */ 1626 if (test_bit(GLF_STICKY, gflags))
1874 1627 *p++ = 's';
1875static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip) 1628 if (test_bit(GLF_DEMOTE, gflags))
1876{ 1629 *p++ = 'D';
1877 unsigned int x; 1630 if (test_bit(GLF_PENDING_DEMOTE, gflags))
1878 1631 *p++ = 'd';
1879 print_dbg(gi, " Inode:\n"); 1632 if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags))
1880 print_dbg(gi, " num = %llu/%llu\n", 1633 *p++ = 'p';
1881 (unsigned long long)ip->i_no_formal_ino, 1634 if (test_bit(GLF_DIRTY, gflags))
1882 (unsigned long long)ip->i_no_addr); 1635 *p++ = 'y';
1883 print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode)); 1636 if (test_bit(GLF_LFLUSH, gflags))
1884 print_dbg(gi, " i_flags ="); 1637 *p++ = 'f';
1885 for (x = 0; x < 32; x++) 1638 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags))
1886 if (test_bit(x, &ip->i_flags)) 1639 *p++ = 'i';
1887 print_dbg(gi, " %u", x); 1640 if (test_bit(GLF_REPLY_PENDING, gflags))
1888 print_dbg(gi, " \n"); 1641 *p++ = 'r';
1889 return 0; 1642 *p = 0;
1643 return buf;
1890} 1644}
1891 1645
1892/** 1646/**
1893 * dump_glock - print information about a glock 1647 * __dump_glock - print information about a glock
1648 * @seq: The seq_file struct
1894 * @gl: the glock 1649 * @gl: the glock
1895 * @count: where we are in the buffer 1650 *
1651 * The file format is as follows:
1652 * One line per object, capital letters are used to indicate objects
1653 * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented,
1654 * other objects are indented by a single space and follow the glock to
1655 * which they are related. Fields are indicated by lower case letters
1656 * followed by a colon and the field value, except for strings which are in
1657 * [] so that its possible to see if they are composed of spaces for
1658 * example. The field's are n = number (id of the object), f = flags,
1659 * t = type, s = state, r = refcount, e = error, p = pid.
1896 * 1660 *
1897 * Returns: 0 on success, -ENOBUFS when we run out of space 1661 * Returns: 0 on success, -ENOBUFS when we run out of space
1898 */ 1662 */
1899 1663
1900static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) 1664static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
1901{ 1665{
1902 struct gfs2_holder *gh; 1666 const struct gfs2_glock_operations *glops = gl->gl_ops;
1903 unsigned int x; 1667 unsigned long long dtime;
1904 int error = -ENOBUFS; 1668 const struct gfs2_holder *gh;
1905 struct task_struct *gl_owner; 1669 char gflags_buf[32];
1670 int error = 0;
1906 1671
1907 spin_lock(&gl->gl_spin); 1672 dtime = jiffies - gl->gl_demote_time;
1673 dtime *= 1000000/HZ; /* demote time in uSec */
1674 if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
1675 dtime = 0;
1676 gfs2_print_dbg(seq, "G: s:%s n:%u/%llu f:%s t:%s d:%s/%llu l:%d a:%d r:%d\n",
1677 state2str(gl->gl_state),
1678 gl->gl_name.ln_type,
1679 (unsigned long long)gl->gl_name.ln_number,
1680 gflags2str(gflags_buf, &gl->gl_flags),
1681 state2str(gl->gl_target),
1682 state2str(gl->gl_demote_state), dtime,
1683 atomic_read(&gl->gl_lvb_count),
1684 atomic_read(&gl->gl_ail_count),
1685 atomic_read(&gl->gl_ref));
1908 1686
1909 print_dbg(gi, "Glock 0x%p (%u, 0x%llx)\n", gl, gl->gl_name.ln_type,
1910 (unsigned long long)gl->gl_name.ln_number);
1911 print_dbg(gi, " gl_flags =");
1912 for (x = 0; x < 32; x++) {
1913 if (test_bit(x, &gl->gl_flags))
1914 print_dbg(gi, " %u", x);
1915 }
1916 if (!test_bit(GLF_LOCK, &gl->gl_flags))
1917 print_dbg(gi, " (unlocked)");
1918 print_dbg(gi, " \n");
1919 print_dbg(gi, " gl_ref = %d\n", atomic_read(&gl->gl_ref));
1920 print_dbg(gi, " gl_state = %u\n", gl->gl_state);
1921 if (gl->gl_owner_pid) {
1922 gl_owner = pid_task(gl->gl_owner_pid, PIDTYPE_PID);
1923 if (gl_owner)
1924 print_dbg(gi, " gl_owner = pid %d (%s)\n",
1925 pid_nr(gl->gl_owner_pid), gl_owner->comm);
1926 else
1927 print_dbg(gi, " gl_owner = %d (ended)\n",
1928 pid_nr(gl->gl_owner_pid));
1929 } else
1930 print_dbg(gi, " gl_owner = -1\n");
1931 print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip);
1932 print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no");
1933 print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
1934 print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no");
1935 print_dbg(gi, " reclaim = %s\n",
1936 (list_empty(&gl->gl_reclaim)) ? "no" : "yes");
1937 if (gl->gl_aspace)
1938 print_dbg(gi, " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace,
1939 gl->gl_aspace->i_mapping->nrpages);
1940 else
1941 print_dbg(gi, " aspace = no\n");
1942 print_dbg(gi, " ail = %d\n", atomic_read(&gl->gl_ail_count));
1943 if (gl->gl_req_gh) {
1944 error = dump_holder(gi, "Request", gl->gl_req_gh);
1945 if (error)
1946 goto out;
1947 }
1948 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 1687 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
1949 error = dump_holder(gi, "Holder", gh); 1688 error = dump_holder(seq, gh);
1950 if (error) 1689 if (error)
1951 goto out; 1690 goto out;
1952 } 1691 }
1953 list_for_each_entry(gh, &gl->gl_waiters1, gh_list) { 1692 if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump)
1954 error = dump_holder(gi, "Waiter1", gh); 1693 error = glops->go_dump(seq, gl);
1955 if (error)
1956 goto out;
1957 }
1958 list_for_each_entry(gh, &gl->gl_waiters3, gh_list) {
1959 error = dump_holder(gi, "Waiter3", gh);
1960 if (error)
1961 goto out;
1962 }
1963 if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
1964 print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n",
1965 gl->gl_demote_state, (unsigned long long)
1966 (jiffies - gl->gl_demote_time)*(1000000/HZ));
1967 }
1968 if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
1969 if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
1970 list_empty(&gl->gl_holders)) {
1971 error = dump_inode(gi, gl->gl_object);
1972 if (error)
1973 goto out;
1974 } else {
1975 error = -ENOBUFS;
1976 print_dbg(gi, " Inode: busy\n");
1977 }
1978 }
1979
1980 error = 0;
1981
1982out: 1694out:
1983 spin_unlock(&gl->gl_spin);
1984 return error; 1695 return error;
1985} 1696}
1986 1697
1698static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
1699{
1700 int ret;
1701 spin_lock(&gl->gl_spin);
1702 ret = __dump_glock(seq, gl);
1703 spin_unlock(&gl->gl_spin);
1704 return ret;
1705}
1706
1987/** 1707/**
1988 * gfs2_dump_lockstate - print out the current lockstate 1708 * gfs2_dump_lockstate - print out the current lockstate
1989 * @sdp: the filesystem 1709 * @sdp: the filesystem
@@ -2086,7 +1806,7 @@ void gfs2_glock_exit(void)
2086module_param(scand_secs, uint, S_IRUGO|S_IWUSR); 1806module_param(scand_secs, uint, S_IRUGO|S_IWUSR);
2087MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs"); 1807MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs");
2088 1808
2089static int gfs2_glock_iter_next(struct glock_iter *gi) 1809static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
2090{ 1810{
2091 struct gfs2_glock *gl; 1811 struct gfs2_glock *gl;
2092 1812
@@ -2104,7 +1824,7 @@ restart:
2104 gfs2_glock_put(gl); 1824 gfs2_glock_put(gl);
2105 if (gl && gi->gl == NULL) 1825 if (gl && gi->gl == NULL)
2106 gi->hash++; 1826 gi->hash++;
2107 while(gi->gl == NULL) { 1827 while (gi->gl == NULL) {
2108 if (gi->hash >= GFS2_GL_HASH_SIZE) 1828 if (gi->hash >= GFS2_GL_HASH_SIZE)
2109 return 1; 1829 return 1;
2110 read_lock(gl_lock_addr(gi->hash)); 1830 read_lock(gl_lock_addr(gi->hash));
@@ -2122,58 +1842,34 @@ restart:
2122 return 0; 1842 return 0;
2123} 1843}
2124 1844
2125static void gfs2_glock_iter_free(struct glock_iter *gi) 1845static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi)
2126{ 1846{
2127 if (gi->gl) 1847 if (gi->gl)
2128 gfs2_glock_put(gi->gl); 1848 gfs2_glock_put(gi->gl);
2129 kfree(gi);
2130}
2131
2132static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp)
2133{
2134 struct glock_iter *gi;
2135
2136 gi = kmalloc(sizeof (*gi), GFP_KERNEL);
2137 if (!gi)
2138 return NULL;
2139
2140 gi->sdp = sdp;
2141 gi->hash = 0;
2142 gi->seq = NULL;
2143 gi->gl = NULL; 1849 gi->gl = NULL;
2144 memset(gi->string, 0, sizeof(gi->string));
2145
2146 if (gfs2_glock_iter_next(gi)) {
2147 gfs2_glock_iter_free(gi);
2148 return NULL;
2149 }
2150
2151 return gi;
2152} 1850}
2153 1851
2154static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos) 1852static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
2155{ 1853{
2156 struct glock_iter *gi; 1854 struct gfs2_glock_iter *gi = seq->private;
2157 loff_t n = *pos; 1855 loff_t n = *pos;
2158 1856
2159 gi = gfs2_glock_iter_init(file->private); 1857 gi->hash = 0;
2160 if (!gi)
2161 return NULL;
2162 1858
2163 while(n--) { 1859 do {
2164 if (gfs2_glock_iter_next(gi)) { 1860 if (gfs2_glock_iter_next(gi)) {
2165 gfs2_glock_iter_free(gi); 1861 gfs2_glock_iter_free(gi);
2166 return NULL; 1862 return NULL;
2167 } 1863 }
2168 } 1864 } while (n--);
2169 1865
2170 return gi; 1866 return gi->gl;
2171} 1867}
2172 1868
2173static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr, 1869static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
2174 loff_t *pos) 1870 loff_t *pos)
2175{ 1871{
2176 struct glock_iter *gi = iter_ptr; 1872 struct gfs2_glock_iter *gi = seq->private;
2177 1873
2178 (*pos)++; 1874 (*pos)++;
2179 1875
@@ -2182,24 +1878,18 @@ static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr,
2182 return NULL; 1878 return NULL;
2183 } 1879 }
2184 1880
2185 return gi; 1881 return gi->gl;
2186} 1882}
2187 1883
2188static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr) 1884static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
2189{ 1885{
2190 struct glock_iter *gi = iter_ptr; 1886 struct gfs2_glock_iter *gi = seq->private;
2191 if (gi) 1887 gfs2_glock_iter_free(gi);
2192 gfs2_glock_iter_free(gi);
2193} 1888}
2194 1889
2195static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr) 1890static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
2196{ 1891{
2197 struct glock_iter *gi = iter_ptr; 1892 return dump_glock(seq, iter_ptr);
2198
2199 gi->seq = file;
2200 dump_glock(gi, gi->gl);
2201
2202 return 0;
2203} 1893}
2204 1894
2205static const struct seq_operations gfs2_glock_seq_ops = { 1895static const struct seq_operations gfs2_glock_seq_ops = {
@@ -2211,17 +1901,14 @@ static const struct seq_operations gfs2_glock_seq_ops = {
2211 1901
2212static int gfs2_debugfs_open(struct inode *inode, struct file *file) 1902static int gfs2_debugfs_open(struct inode *inode, struct file *file)
2213{ 1903{
2214 struct seq_file *seq; 1904 int ret = seq_open_private(file, &gfs2_glock_seq_ops,
2215 int ret; 1905 sizeof(struct gfs2_glock_iter));
2216 1906 if (ret == 0) {
2217 ret = seq_open(file, &gfs2_glock_seq_ops); 1907 struct seq_file *seq = file->private_data;
2218 if (ret) 1908 struct gfs2_glock_iter *gi = seq->private;
2219 return ret; 1909 gi->sdp = inode->i_private;
2220 1910 }
2221 seq = file->private_data; 1911 return ret;
2222 seq->private = inode->i_private;
2223
2224 return 0;
2225} 1912}
2226 1913
2227static const struct file_operations gfs2_debug_fops = { 1914static const struct file_operations gfs2_debug_fops = {
@@ -2229,7 +1916,7 @@ static const struct file_operations gfs2_debug_fops = {
2229 .open = gfs2_debugfs_open, 1916 .open = gfs2_debugfs_open,
2230 .read = seq_read, 1917 .read = seq_read,
2231 .llseek = seq_lseek, 1918 .llseek = seq_lseek,
2232 .release = seq_release 1919 .release = seq_release_private,
2233}; 1920};
2234 1921
2235int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) 1922int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index cdad3e6f8150..971d92af70fc 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -26,11 +26,8 @@
26#define GL_SKIP 0x00000100 26#define GL_SKIP 0x00000100
27#define GL_ATIME 0x00000200 27#define GL_ATIME 0x00000200
28#define GL_NOCACHE 0x00000400 28#define GL_NOCACHE 0x00000400
29#define GL_FLOCK 0x00000800
30#define GL_NOCANCEL 0x00001000
31 29
32#define GLR_TRYFAILED 13 30#define GLR_TRYFAILED 13
33#define GLR_CANCELED 14
34 31
35static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) 32static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
36{ 33{
@@ -41,6 +38,8 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *
41 spin_lock(&gl->gl_spin); 38 spin_lock(&gl->gl_spin);
42 pid = task_pid(current); 39 pid = task_pid(current);
43 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 40 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
41 if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
42 break;
44 if (gh->gh_owner_pid == pid) 43 if (gh->gh_owner_pid == pid)
45 goto out; 44 goto out;
46 } 45 }
@@ -70,7 +69,7 @@ static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl)
70{ 69{
71 int ret; 70 int ret;
72 spin_lock(&gl->gl_spin); 71 spin_lock(&gl->gl_spin);
73 ret = test_bit(GLF_DEMOTE, &gl->gl_flags) || !list_empty(&gl->gl_waiters3); 72 ret = test_bit(GLF_DEMOTE, &gl->gl_flags);
74 spin_unlock(&gl->gl_spin); 73 spin_unlock(&gl->gl_spin);
75 return ret; 74 return ret;
76} 75}
@@ -98,6 +97,7 @@ int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
98int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); 97int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
99void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); 98void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
100void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); 99void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
100void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
101 101
102/** 102/**
103 * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock 103 * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
@@ -130,10 +130,9 @@ int gfs2_lvb_hold(struct gfs2_glock *gl);
130void gfs2_lvb_unhold(struct gfs2_glock *gl); 130void gfs2_lvb_unhold(struct gfs2_glock *gl);
131 131
132void gfs2_glock_cb(void *cb_data, unsigned int type, void *data); 132void gfs2_glock_cb(void *cb_data, unsigned int type, void *data);
133
134void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); 133void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
135void gfs2_reclaim_glock(struct gfs2_sbd *sdp); 134void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
136void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait); 135void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
137 136
138int __init gfs2_glock_init(void); 137int __init gfs2_glock_init(void);
139void gfs2_glock_exit(void); 138void gfs2_glock_exit(void);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 07d84d16cda4..c6c318c2a0f6 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -13,6 +13,7 @@
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/gfs2_ondisk.h> 14#include <linux/gfs2_ondisk.h>
15#include <linux/lm_interface.h> 15#include <linux/lm_interface.h>
16#include <linux/bio.h>
16 17
17#include "gfs2.h" 18#include "gfs2.h"
18#include "incore.h" 19#include "incore.h"
@@ -172,26 +173,6 @@ static void inode_go_sync(struct gfs2_glock *gl)
172} 173}
173 174
174/** 175/**
175 * inode_go_xmote_bh - After promoting/demoting a glock
176 * @gl: the glock
177 *
178 */
179
180static void inode_go_xmote_bh(struct gfs2_glock *gl)
181{
182 struct gfs2_holder *gh = gl->gl_req_gh;
183 struct buffer_head *bh;
184 int error;
185
186 if (gl->gl_state != LM_ST_UNLOCKED &&
187 (!gh || !(gh->gh_flags & GL_SKIP))) {
188 error = gfs2_meta_read(gl, gl->gl_name.ln_number, 0, &bh);
189 if (!error)
190 brelse(bh);
191 }
192}
193
194/**
195 * inode_go_inval - prepare a inode glock to be released 176 * inode_go_inval - prepare a inode glock to be released
196 * @gl: the glock 177 * @gl: the glock
197 * @flags: 178 * @flags:
@@ -267,6 +248,26 @@ static int inode_go_lock(struct gfs2_holder *gh)
267} 248}
268 249
269/** 250/**
251 * inode_go_dump - print information about an inode
252 * @seq: The iterator
253 * @ip: the inode
254 *
255 * Returns: 0 on success, -ENOBUFS when we run out of space
256 */
257
258static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
259{
260 const struct gfs2_inode *ip = gl->gl_object;
261 if (ip == NULL)
262 return 0;
263 gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%08lx\n",
264 (unsigned long long)ip->i_no_formal_ino,
265 (unsigned long long)ip->i_no_addr,
266 IF2DT(ip->i_inode.i_mode), ip->i_flags);
267 return 0;
268}
269
270/**
270 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock 271 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
271 * @gl: the glock 272 * @gl: the glock
272 * 273 *
@@ -306,6 +307,22 @@ static void rgrp_go_unlock(struct gfs2_holder *gh)
306} 307}
307 308
308/** 309/**
310 * rgrp_go_dump - print out an rgrp
311 * @seq: The iterator
312 * @gl: The glock in question
313 *
314 */
315
316static int rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
317{
318 const struct gfs2_rgrpd *rgd = gl->gl_object;
319 if (rgd == NULL)
320 return 0;
321 gfs2_print_dbg(seq, " R: n:%llu\n", (unsigned long long)rgd->rd_addr);
322 return 0;
323}
324
325/**
309 * trans_go_sync - promote/demote the transaction glock 326 * trans_go_sync - promote/demote the transaction glock
310 * @gl: the glock 327 * @gl: the glock
311 * @state: the requested state 328 * @state: the requested state
@@ -330,7 +347,7 @@ static void trans_go_sync(struct gfs2_glock *gl)
330 * 347 *
331 */ 348 */
332 349
333static void trans_go_xmote_bh(struct gfs2_glock *gl) 350static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
334{ 351{
335 struct gfs2_sbd *sdp = gl->gl_sbd; 352 struct gfs2_sbd *sdp = gl->gl_sbd;
336 struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); 353 struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
@@ -338,8 +355,7 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl)
338 struct gfs2_log_header_host head; 355 struct gfs2_log_header_host head;
339 int error; 356 int error;
340 357
341 if (gl->gl_state != LM_ST_UNLOCKED && 358 if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
342 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
343 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); 359 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
344 360
345 error = gfs2_find_jhead(sdp->sd_jdesc, &head); 361 error = gfs2_find_jhead(sdp->sd_jdesc, &head);
@@ -354,6 +370,7 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl)
354 gfs2_log_pointers_init(sdp, head.lh_blkno); 370 gfs2_log_pointers_init(sdp, head.lh_blkno);
355 } 371 }
356 } 372 }
373 return 0;
357} 374}
358 375
359/** 376/**
@@ -375,12 +392,12 @@ const struct gfs2_glock_operations gfs2_meta_glops = {
375 392
376const struct gfs2_glock_operations gfs2_inode_glops = { 393const struct gfs2_glock_operations gfs2_inode_glops = {
377 .go_xmote_th = inode_go_sync, 394 .go_xmote_th = inode_go_sync,
378 .go_xmote_bh = inode_go_xmote_bh,
379 .go_inval = inode_go_inval, 395 .go_inval = inode_go_inval,
380 .go_demote_ok = inode_go_demote_ok, 396 .go_demote_ok = inode_go_demote_ok,
381 .go_lock = inode_go_lock, 397 .go_lock = inode_go_lock,
398 .go_dump = inode_go_dump,
382 .go_type = LM_TYPE_INODE, 399 .go_type = LM_TYPE_INODE,
383 .go_min_hold_time = HZ / 10, 400 .go_min_hold_time = HZ / 5,
384}; 401};
385 402
386const struct gfs2_glock_operations gfs2_rgrp_glops = { 403const struct gfs2_glock_operations gfs2_rgrp_glops = {
@@ -389,8 +406,9 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
389 .go_demote_ok = rgrp_go_demote_ok, 406 .go_demote_ok = rgrp_go_demote_ok,
390 .go_lock = rgrp_go_lock, 407 .go_lock = rgrp_go_lock,
391 .go_unlock = rgrp_go_unlock, 408 .go_unlock = rgrp_go_unlock,
409 .go_dump = rgrp_go_dump,
392 .go_type = LM_TYPE_RGRP, 410 .go_type = LM_TYPE_RGRP,
393 .go_min_hold_time = HZ / 10, 411 .go_min_hold_time = HZ / 5,
394}; 412};
395 413
396const struct gfs2_glock_operations gfs2_trans_glops = { 414const struct gfs2_glock_operations gfs2_trans_glops = {
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index eabe5eac41da..448697a5c462 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -77,7 +77,6 @@ struct gfs2_rgrp_host {
77struct gfs2_rgrpd { 77struct gfs2_rgrpd {
78 struct list_head rd_list; /* Link with superblock */ 78 struct list_head rd_list; /* Link with superblock */
79 struct list_head rd_list_mru; 79 struct list_head rd_list_mru;
80 struct list_head rd_recent; /* Recently used rgrps */
81 struct gfs2_glock *rd_gl; /* Glock for this rgrp */ 80 struct gfs2_glock *rd_gl; /* Glock for this rgrp */
82 u64 rd_addr; /* grp block disk address */ 81 u64 rd_addr; /* grp block disk address */
83 u64 rd_data0; /* first data location */ 82 u64 rd_data0; /* first data location */
@@ -128,20 +127,20 @@ struct gfs2_bufdata {
128 127
129struct gfs2_glock_operations { 128struct gfs2_glock_operations {
130 void (*go_xmote_th) (struct gfs2_glock *gl); 129 void (*go_xmote_th) (struct gfs2_glock *gl);
131 void (*go_xmote_bh) (struct gfs2_glock *gl); 130 int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh);
132 void (*go_inval) (struct gfs2_glock *gl, int flags); 131 void (*go_inval) (struct gfs2_glock *gl, int flags);
133 int (*go_demote_ok) (struct gfs2_glock *gl); 132 int (*go_demote_ok) (struct gfs2_glock *gl);
134 int (*go_lock) (struct gfs2_holder *gh); 133 int (*go_lock) (struct gfs2_holder *gh);
135 void (*go_unlock) (struct gfs2_holder *gh); 134 void (*go_unlock) (struct gfs2_holder *gh);
135 int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
136 const int go_type; 136 const int go_type;
137 const unsigned long go_min_hold_time; 137 const unsigned long go_min_hold_time;
138}; 138};
139 139
140enum { 140enum {
141 /* States */ 141 /* States */
142 HIF_HOLDER = 6, 142 HIF_HOLDER = 6, /* Set for gh that "holds" the glock */
143 HIF_FIRST = 7, 143 HIF_FIRST = 7,
144 HIF_ABORTED = 9,
145 HIF_WAIT = 10, 144 HIF_WAIT = 10,
146}; 145};
147 146
@@ -154,20 +153,20 @@ struct gfs2_holder {
154 unsigned gh_flags; 153 unsigned gh_flags;
155 154
156 int gh_error; 155 int gh_error;
157 unsigned long gh_iflags; 156 unsigned long gh_iflags; /* HIF_... */
158 unsigned long gh_ip; 157 unsigned long gh_ip;
159}; 158};
160 159
161enum { 160enum {
162 GLF_LOCK = 1, 161 GLF_LOCK = 1,
163 GLF_STICKY = 2, 162 GLF_STICKY = 2,
164 GLF_DEMOTE = 3, 163 GLF_DEMOTE = 3,
165 GLF_PENDING_DEMOTE = 4, 164 GLF_PENDING_DEMOTE = 4,
166 GLF_DIRTY = 5, 165 GLF_DEMOTE_IN_PROGRESS = 5,
167 GLF_DEMOTE_IN_PROGRESS = 6, 166 GLF_DIRTY = 6,
168 GLF_LFLUSH = 7, 167 GLF_LFLUSH = 7,
169 GLF_WAITERS2 = 8, 168 GLF_INVALIDATE_IN_PROGRESS = 8,
170 GLF_CONV_DEADLK = 9, 169 GLF_REPLY_PENDING = 9,
171}; 170};
172 171
173struct gfs2_glock { 172struct gfs2_glock {
@@ -179,19 +178,14 @@ struct gfs2_glock {
179 spinlock_t gl_spin; 178 spinlock_t gl_spin;
180 179
181 unsigned int gl_state; 180 unsigned int gl_state;
181 unsigned int gl_target;
182 unsigned int gl_reply;
182 unsigned int gl_hash; 183 unsigned int gl_hash;
183 unsigned int gl_demote_state; /* state requested by remote node */ 184 unsigned int gl_demote_state; /* state requested by remote node */
184 unsigned long gl_demote_time; /* time of first demote request */ 185 unsigned long gl_demote_time; /* time of first demote request */
185 struct pid *gl_owner_pid;
186 unsigned long gl_ip;
187 struct list_head gl_holders; 186 struct list_head gl_holders;
188 struct list_head gl_waiters1; /* HIF_MUTEX */
189 struct list_head gl_waiters3; /* HIF_PROMOTE */
190 187
191 const struct gfs2_glock_operations *gl_ops; 188 const struct gfs2_glock_operations *gl_ops;
192
193 struct gfs2_holder *gl_req_gh;
194
195 void *gl_lock; 189 void *gl_lock;
196 char *gl_lvb; 190 char *gl_lvb;
197 atomic_t gl_lvb_count; 191 atomic_t gl_lvb_count;
@@ -427,7 +421,6 @@ struct gfs2_tune {
427 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ 421 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
428 unsigned int gt_atime_quantum; /* Min secs between atime updates */ 422 unsigned int gt_atime_quantum; /* Min secs between atime updates */
429 unsigned int gt_new_files_jdata; 423 unsigned int gt_new_files_jdata;
430 unsigned int gt_new_files_directio;
431 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ 424 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
432 unsigned int gt_stall_secs; /* Detects trouble! */ 425 unsigned int gt_stall_secs; /* Detects trouble! */
433 unsigned int gt_complain_secs; 426 unsigned int gt_complain_secs;
@@ -534,7 +527,6 @@ struct gfs2_sbd {
534 struct mutex sd_rindex_mutex; 527 struct mutex sd_rindex_mutex;
535 struct list_head sd_rindex_list; 528 struct list_head sd_rindex_list;
536 struct list_head sd_rindex_mru_list; 529 struct list_head sd_rindex_mru_list;
537 struct list_head sd_rindex_recent_list;
538 struct gfs2_rgrpd *sd_rindex_forward; 530 struct gfs2_rgrpd *sd_rindex_forward;
539 unsigned int sd_rgrps; 531 unsigned int sd_rgrps;
540 532
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 09453d057e41..6da0ab355b8a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -504,7 +504,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
504 } 504 }
505 505
506 if (!is_root) { 506 if (!is_root) {
507 error = permission(dir, MAY_EXEC, NULL); 507 error = gfs2_permission(dir, MAY_EXEC);
508 if (error) 508 if (error)
509 goto out; 509 goto out;
510 } 510 }
@@ -667,7 +667,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
667{ 667{
668 int error; 668 int error;
669 669
670 error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL); 670 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
671 if (error) 671 if (error)
672 return error; 672 return error;
673 673
@@ -789,13 +789,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
789 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) || 789 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) ||
790 gfs2_tune_get(sdp, gt_new_files_jdata)) 790 gfs2_tune_get(sdp, gt_new_files_jdata))
791 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); 791 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
792 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) ||
793 gfs2_tune_get(sdp, gt_new_files_directio))
794 di->di_flags |= cpu_to_be32(GFS2_DIF_DIRECTIO);
795 } else if (S_ISDIR(mode)) { 792 } else if (S_ISDIR(mode)) {
796 di->di_flags |= cpu_to_be32(dip->i_di.di_flags & 793 di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
797 GFS2_DIF_INHERIT_DIRECTIO);
798 di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
799 GFS2_DIF_INHERIT_JDATA); 794 GFS2_DIF_INHERIT_JDATA);
800 } 795 }
801 796
@@ -1134,7 +1129,7 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
1134 if (IS_APPEND(&dip->i_inode)) 1129 if (IS_APPEND(&dip->i_inode))
1135 return -EPERM; 1130 return -EPERM;
1136 1131
1137 error = permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, NULL); 1132 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
1138 if (error) 1133 if (error)
1139 return error; 1134 return error;
1140 1135
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 580da454b38f..6074c2506f75 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -72,7 +72,6 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip,
72} 72}
73 73
74 74
75void gfs2_inode_attr_in(struct gfs2_inode *ip);
76void gfs2_set_iop(struct inode *inode); 75void gfs2_set_iop(struct inode *inode);
77struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 76struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
78 u64 no_addr, u64 no_formal_ino, 77 u64 no_addr, u64 no_formal_ino,
@@ -91,6 +90,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
91 struct gfs2_inode *ip); 90 struct gfs2_inode *ip);
92int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, 91int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
93 const struct gfs2_inode *ip); 92 const struct gfs2_inode *ip);
93int gfs2_permission(struct inode *inode, int mask);
94int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); 94int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
95int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); 95int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
96int gfs2_glock_nq_atime(struct gfs2_holder *gh); 96int gfs2_glock_nq_atime(struct gfs2_holder *gh);
diff --git a/fs/gfs2/locking.c b/fs/gfs2/locking.c
index 663fee728783..523243a13a21 100644
--- a/fs/gfs2/locking.c
+++ b/fs/gfs2/locking.c
@@ -23,12 +23,54 @@ struct lmh_wrapper {
23 const struct lm_lockops *lw_ops; 23 const struct lm_lockops *lw_ops;
24}; 24};
25 25
26static int nolock_mount(char *table_name, char *host_data,
27 lm_callback_t cb, void *cb_data,
28 unsigned int min_lvb_size, int flags,
29 struct lm_lockstruct *lockstruct,
30 struct kobject *fskobj);
31
26/* List of registered low-level locking protocols. A file system selects one 32/* List of registered low-level locking protocols. A file system selects one
27 of them by name at mount time, e.g. lock_nolock, lock_dlm. */ 33 of them by name at mount time, e.g. lock_nolock, lock_dlm. */
28 34
35static const struct lm_lockops nolock_ops = {
36 .lm_proto_name = "lock_nolock",
37 .lm_mount = nolock_mount,
38};
39
40static struct lmh_wrapper nolock_proto = {
41 .lw_list = LIST_HEAD_INIT(nolock_proto.lw_list),
42 .lw_ops = &nolock_ops,
43};
44
29static LIST_HEAD(lmh_list); 45static LIST_HEAD(lmh_list);
30static DEFINE_MUTEX(lmh_lock); 46static DEFINE_MUTEX(lmh_lock);
31 47
48static int nolock_mount(char *table_name, char *host_data,
49 lm_callback_t cb, void *cb_data,
50 unsigned int min_lvb_size, int flags,
51 struct lm_lockstruct *lockstruct,
52 struct kobject *fskobj)
53{
54 char *c;
55 unsigned int jid;
56
57 c = strstr(host_data, "jid=");
58 if (!c)
59 jid = 0;
60 else {
61 c += 4;
62 sscanf(c, "%u", &jid);
63 }
64
65 lockstruct->ls_jid = jid;
66 lockstruct->ls_first = 1;
67 lockstruct->ls_lvb_size = min_lvb_size;
68 lockstruct->ls_ops = &nolock_ops;
69 lockstruct->ls_flags = LM_LSFLAG_LOCAL;
70
71 return 0;
72}
73
32/** 74/**
33 * gfs2_register_lockproto - Register a low-level locking protocol 75 * gfs2_register_lockproto - Register a low-level locking protocol
34 * @proto: the protocol definition 76 * @proto: the protocol definition
@@ -116,9 +158,13 @@ int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
116 int try = 0; 158 int try = 0;
117 int error, found; 159 int error, found;
118 160
161
119retry: 162retry:
120 mutex_lock(&lmh_lock); 163 mutex_lock(&lmh_lock);
121 164
165 if (list_empty(&nolock_proto.lw_list))
166 list_add(&nolock_proto.lw_list, &lmh_list);
167
122 found = 0; 168 found = 0;
123 list_for_each_entry(lw, &lmh_list, lw_list) { 169 list_for_each_entry(lw, &lmh_list, lw_list) {
124 if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) { 170 if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) {
@@ -139,7 +185,8 @@ retry:
139 goto out; 185 goto out;
140 } 186 }
141 187
142 if (!try_module_get(lw->lw_ops->lm_owner)) { 188 if (lw->lw_ops->lm_owner &&
189 !try_module_get(lw->lw_ops->lm_owner)) {
143 try = 0; 190 try = 0;
144 mutex_unlock(&lmh_lock); 191 mutex_unlock(&lmh_lock);
145 msleep(1000); 192 msleep(1000);
@@ -158,7 +205,8 @@ out:
158void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct) 205void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct)
159{ 206{
160 mutex_lock(&lmh_lock); 207 mutex_lock(&lmh_lock);
161 lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace); 208 if (lockstruct->ls_ops->lm_unmount)
209 lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
162 if (lockstruct->ls_ops->lm_owner) 210 if (lockstruct->ls_ops->lm_owner)
163 module_put(lockstruct->ls_ops->lm_owner); 211 module_put(lockstruct->ls_ops->lm_owner);
164 mutex_unlock(&lmh_lock); 212 mutex_unlock(&lmh_lock);
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
index cf7ea8abec87..2482c9047505 100644
--- a/fs/gfs2/locking/dlm/lock.c
+++ b/fs/gfs2/locking/dlm/lock.c
@@ -11,46 +11,60 @@
11 11
12static char junk_lvb[GDLM_LVB_SIZE]; 12static char junk_lvb[GDLM_LVB_SIZE];
13 13
14static void queue_complete(struct gdlm_lock *lp) 14
15/* convert dlm lock-mode to gfs lock-state */
16
17static s16 gdlm_make_lmstate(s16 dlmmode)
15{ 18{
16 struct gdlm_ls *ls = lp->ls; 19 switch (dlmmode) {
20 case DLM_LOCK_IV:
21 case DLM_LOCK_NL:
22 return LM_ST_UNLOCKED;
23 case DLM_LOCK_EX:
24 return LM_ST_EXCLUSIVE;
25 case DLM_LOCK_CW:
26 return LM_ST_DEFERRED;
27 case DLM_LOCK_PR:
28 return LM_ST_SHARED;
29 }
30 gdlm_assert(0, "unknown DLM mode %d", dlmmode);
31 return -1;
32}
17 33
18 clear_bit(LFL_ACTIVE, &lp->flags); 34/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
35 thread gets to it. */
36
37static void queue_submit(struct gdlm_lock *lp)
38{
39 struct gdlm_ls *ls = lp->ls;
19 40
20 spin_lock(&ls->async_lock); 41 spin_lock(&ls->async_lock);
21 list_add_tail(&lp->clist, &ls->complete); 42 list_add_tail(&lp->delay_list, &ls->submit);
22 spin_unlock(&ls->async_lock); 43 spin_unlock(&ls->async_lock);
23 wake_up(&ls->thread_wait); 44 wake_up(&ls->thread_wait);
24} 45}
25 46
26static inline void gdlm_ast(void *astarg) 47static void wake_up_ast(struct gdlm_lock *lp)
27{ 48{
28 queue_complete(astarg); 49 clear_bit(LFL_AST_WAIT, &lp->flags);
50 smp_mb__after_clear_bit();
51 wake_up_bit(&lp->flags, LFL_AST_WAIT);
29} 52}
30 53
31static inline void gdlm_bast(void *astarg, int mode) 54static void gdlm_delete_lp(struct gdlm_lock *lp)
32{ 55{
33 struct gdlm_lock *lp = astarg;
34 struct gdlm_ls *ls = lp->ls; 56 struct gdlm_ls *ls = lp->ls;
35 57
36 if (!mode) {
37 printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n",
38 lp->lockname.ln_type,
39 (unsigned long long)lp->lockname.ln_number);
40 return;
41 }
42
43 spin_lock(&ls->async_lock); 58 spin_lock(&ls->async_lock);
44 if (!lp->bast_mode) { 59 if (!list_empty(&lp->delay_list))
45 list_add_tail(&lp->blist, &ls->blocking); 60 list_del_init(&lp->delay_list);
46 lp->bast_mode = mode; 61 ls->all_locks_count--;
47 } else if (lp->bast_mode < mode)
48 lp->bast_mode = mode;
49 spin_unlock(&ls->async_lock); 62 spin_unlock(&ls->async_lock);
50 wake_up(&ls->thread_wait); 63
64 kfree(lp);
51} 65}
52 66
53void gdlm_queue_delayed(struct gdlm_lock *lp) 67static void gdlm_queue_delayed(struct gdlm_lock *lp)
54{ 68{
55 struct gdlm_ls *ls = lp->ls; 69 struct gdlm_ls *ls = lp->ls;
56 70
@@ -59,6 +73,236 @@ void gdlm_queue_delayed(struct gdlm_lock *lp)
59 spin_unlock(&ls->async_lock); 73 spin_unlock(&ls->async_lock);
60} 74}
61 75
76static void process_complete(struct gdlm_lock *lp)
77{
78 struct gdlm_ls *ls = lp->ls;
79 struct lm_async_cb acb;
80
81 memset(&acb, 0, sizeof(acb));
82
83 if (lp->lksb.sb_status == -DLM_ECANCEL) {
84 log_info("complete dlm cancel %x,%llx flags %lx",
85 lp->lockname.ln_type,
86 (unsigned long long)lp->lockname.ln_number,
87 lp->flags);
88
89 lp->req = lp->cur;
90 acb.lc_ret |= LM_OUT_CANCELED;
91 if (lp->cur == DLM_LOCK_IV)
92 lp->lksb.sb_lkid = 0;
93 goto out;
94 }
95
96 if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
97 if (lp->lksb.sb_status != -DLM_EUNLOCK) {
98 log_info("unlock sb_status %d %x,%llx flags %lx",
99 lp->lksb.sb_status, lp->lockname.ln_type,
100 (unsigned long long)lp->lockname.ln_number,
101 lp->flags);
102 return;
103 }
104
105 lp->cur = DLM_LOCK_IV;
106 lp->req = DLM_LOCK_IV;
107 lp->lksb.sb_lkid = 0;
108
109 if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
110 gdlm_delete_lp(lp);
111 return;
112 }
113 goto out;
114 }
115
116 if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
117 memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
118
119 if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
120 if (lp->req == DLM_LOCK_PR)
121 lp->req = DLM_LOCK_CW;
122 else if (lp->req == DLM_LOCK_CW)
123 lp->req = DLM_LOCK_PR;
124 }
125
126 /*
127 * A canceled lock request. The lock was just taken off the delayed
128 * list and was never even submitted to dlm.
129 */
130
131 if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
132 log_info("complete internal cancel %x,%llx",
133 lp->lockname.ln_type,
134 (unsigned long long)lp->lockname.ln_number);
135 lp->req = lp->cur;
136 acb.lc_ret |= LM_OUT_CANCELED;
137 goto out;
138 }
139
140 /*
141 * An error occured.
142 */
143
144 if (lp->lksb.sb_status) {
145 /* a "normal" error */
146 if ((lp->lksb.sb_status == -EAGAIN) &&
147 (lp->lkf & DLM_LKF_NOQUEUE)) {
148 lp->req = lp->cur;
149 if (lp->cur == DLM_LOCK_IV)
150 lp->lksb.sb_lkid = 0;
151 goto out;
152 }
153
154 /* this could only happen with cancels I think */
155 log_info("ast sb_status %d %x,%llx flags %lx",
156 lp->lksb.sb_status, lp->lockname.ln_type,
157 (unsigned long long)lp->lockname.ln_number,
158 lp->flags);
159 return;
160 }
161
162 /*
163 * This is an AST for an EX->EX conversion for sync_lvb from GFS.
164 */
165
166 if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
167 wake_up_ast(lp);
168 return;
169 }
170
171 /*
172 * A lock has been demoted to NL because it initially completed during
173 * BLOCK_LOCKS. Now it must be requested in the originally requested
174 * mode.
175 */
176
177 if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
178 gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx",
179 lp->lockname.ln_type,
180 (unsigned long long)lp->lockname.ln_number);
181 gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx",
182 lp->lockname.ln_type,
183 (unsigned long long)lp->lockname.ln_number);
184
185 lp->cur = DLM_LOCK_NL;
186 lp->req = lp->prev_req;
187 lp->prev_req = DLM_LOCK_IV;
188 lp->lkf &= ~DLM_LKF_CONVDEADLK;
189
190 set_bit(LFL_NOCACHE, &lp->flags);
191
192 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
193 !test_bit(LFL_NOBLOCK, &lp->flags))
194 gdlm_queue_delayed(lp);
195 else
196 queue_submit(lp);
197 return;
198 }
199
200 /*
201 * A request is granted during dlm recovery. It may be granted
202 * because the locks of a failed node were cleared. In that case,
203 * there may be inconsistent data beneath this lock and we must wait
204 * for recovery to complete to use it. When gfs recovery is done this
205 * granted lock will be converted to NL and then reacquired in this
206 * granted state.
207 */
208
209 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
210 !test_bit(LFL_NOBLOCK, &lp->flags) &&
211 lp->req != DLM_LOCK_NL) {
212
213 lp->cur = lp->req;
214 lp->prev_req = lp->req;
215 lp->req = DLM_LOCK_NL;
216 lp->lkf |= DLM_LKF_CONVERT;
217 lp->lkf &= ~DLM_LKF_CONVDEADLK;
218
219 log_debug("rereq %x,%llx id %x %d,%d",
220 lp->lockname.ln_type,
221 (unsigned long long)lp->lockname.ln_number,
222 lp->lksb.sb_lkid, lp->cur, lp->req);
223
224 set_bit(LFL_REREQUEST, &lp->flags);
225 queue_submit(lp);
226 return;
227 }
228
229 /*
230 * DLM demoted the lock to NL before it was granted so GFS must be
231 * told it cannot cache data for this lock.
232 */
233
234 if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
235 set_bit(LFL_NOCACHE, &lp->flags);
236
237out:
238 /*
239 * This is an internal lock_dlm lock
240 */
241
242 if (test_bit(LFL_INLOCK, &lp->flags)) {
243 clear_bit(LFL_NOBLOCK, &lp->flags);
244 lp->cur = lp->req;
245 wake_up_ast(lp);
246 return;
247 }
248
249 /*
250 * Normal completion of a lock request. Tell GFS it now has the lock.
251 */
252
253 clear_bit(LFL_NOBLOCK, &lp->flags);
254 lp->cur = lp->req;
255
256 acb.lc_name = lp->lockname;
257 acb.lc_ret |= gdlm_make_lmstate(lp->cur);
258
259 ls->fscb(ls->sdp, LM_CB_ASYNC, &acb);
260}
261
262static void gdlm_ast(void *astarg)
263{
264 struct gdlm_lock *lp = astarg;
265 clear_bit(LFL_ACTIVE, &lp->flags);
266 process_complete(lp);
267}
268
269static void process_blocking(struct gdlm_lock *lp, int bast_mode)
270{
271 struct gdlm_ls *ls = lp->ls;
272 unsigned int cb = 0;
273
274 switch (gdlm_make_lmstate(bast_mode)) {
275 case LM_ST_EXCLUSIVE:
276 cb = LM_CB_NEED_E;
277 break;
278 case LM_ST_DEFERRED:
279 cb = LM_CB_NEED_D;
280 break;
281 case LM_ST_SHARED:
282 cb = LM_CB_NEED_S;
283 break;
284 default:
285 gdlm_assert(0, "unknown bast mode %u", bast_mode);
286 }
287
288 ls->fscb(ls->sdp, cb, &lp->lockname);
289}
290
291
292static void gdlm_bast(void *astarg, int mode)
293{
294 struct gdlm_lock *lp = astarg;
295
296 if (!mode) {
297 printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n",
298 lp->lockname.ln_type,
299 (unsigned long long)lp->lockname.ln_number);
300 return;
301 }
302
303 process_blocking(lp, mode);
304}
305
62/* convert gfs lock-state to dlm lock-mode */ 306/* convert gfs lock-state to dlm lock-mode */
63 307
64static s16 make_mode(s16 lmstate) 308static s16 make_mode(s16 lmstate)
@@ -77,24 +321,6 @@ static s16 make_mode(s16 lmstate)
77 return -1; 321 return -1;
78} 322}
79 323
80/* convert dlm lock-mode to gfs lock-state */
81
82s16 gdlm_make_lmstate(s16 dlmmode)
83{
84 switch (dlmmode) {
85 case DLM_LOCK_IV:
86 case DLM_LOCK_NL:
87 return LM_ST_UNLOCKED;
88 case DLM_LOCK_EX:
89 return LM_ST_EXCLUSIVE;
90 case DLM_LOCK_CW:
91 return LM_ST_DEFERRED;
92 case DLM_LOCK_PR:
93 return LM_ST_SHARED;
94 }
95 gdlm_assert(0, "unknown DLM mode %d", dlmmode);
96 return -1;
97}
98 324
99/* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and 325/* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and
100 DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */ 326 DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */
@@ -134,14 +360,6 @@ static inline unsigned int make_flags(struct gdlm_lock *lp,
134 360
135 if (lp->lksb.sb_lkid != 0) { 361 if (lp->lksb.sb_lkid != 0) {
136 lkf |= DLM_LKF_CONVERT; 362 lkf |= DLM_LKF_CONVERT;
137
138 /* Conversion deadlock avoidance by DLM */
139
140 if (!(lp->ls->fsflags & LM_MFLAG_CONV_NODROP) &&
141 !test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
142 !(lkf & DLM_LKF_NOQUEUE) &&
143 cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
144 lkf |= DLM_LKF_CONVDEADLK;
145 } 363 }
146 364
147 if (lp->lvb) 365 if (lp->lvb)
@@ -173,14 +391,9 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
173 make_strname(name, &lp->strname); 391 make_strname(name, &lp->strname);
174 lp->ls = ls; 392 lp->ls = ls;
175 lp->cur = DLM_LOCK_IV; 393 lp->cur = DLM_LOCK_IV;
176 lp->lvb = NULL;
177 lp->hold_null = NULL;
178 INIT_LIST_HEAD(&lp->clist);
179 INIT_LIST_HEAD(&lp->blist);
180 INIT_LIST_HEAD(&lp->delay_list); 394 INIT_LIST_HEAD(&lp->delay_list);
181 395
182 spin_lock(&ls->async_lock); 396 spin_lock(&ls->async_lock);
183 list_add(&lp->all_list, &ls->all_locks);
184 ls->all_locks_count++; 397 ls->all_locks_count++;
185 spin_unlock(&ls->async_lock); 398 spin_unlock(&ls->async_lock);
186 399
@@ -188,26 +401,6 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
188 return 0; 401 return 0;
189} 402}
190 403
191void gdlm_delete_lp(struct gdlm_lock *lp)
192{
193 struct gdlm_ls *ls = lp->ls;
194
195 spin_lock(&ls->async_lock);
196 if (!list_empty(&lp->clist))
197 list_del_init(&lp->clist);
198 if (!list_empty(&lp->blist))
199 list_del_init(&lp->blist);
200 if (!list_empty(&lp->delay_list))
201 list_del_init(&lp->delay_list);
202 gdlm_assert(!list_empty(&lp->all_list), "%x,%llx", lp->lockname.ln_type,
203 (unsigned long long)lp->lockname.ln_number);
204 list_del_init(&lp->all_list);
205 ls->all_locks_count--;
206 spin_unlock(&ls->async_lock);
207
208 kfree(lp);
209}
210
211int gdlm_get_lock(void *lockspace, struct lm_lockname *name, 404int gdlm_get_lock(void *lockspace, struct lm_lockname *name,
212 void **lockp) 405 void **lockp)
213{ 406{
@@ -261,7 +454,7 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp)
261 454
262 if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) { 455 if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
263 lp->lksb.sb_status = -EAGAIN; 456 lp->lksb.sb_status = -EAGAIN;
264 queue_complete(lp); 457 gdlm_ast(lp);
265 error = 0; 458 error = 0;
266 } 459 }
267 460
@@ -308,6 +501,12 @@ unsigned int gdlm_lock(void *lock, unsigned int cur_state,
308{ 501{
309 struct gdlm_lock *lp = lock; 502 struct gdlm_lock *lp = lock;
310 503
504 if (req_state == LM_ST_UNLOCKED)
505 return gdlm_unlock(lock, cur_state);
506
507 if (req_state == LM_ST_UNLOCKED)
508 return gdlm_unlock(lock, cur_state);
509
311 clear_bit(LFL_DLM_CANCEL, &lp->flags); 510 clear_bit(LFL_DLM_CANCEL, &lp->flags);
312 if (flags & LM_FLAG_NOEXP) 511 if (flags & LM_FLAG_NOEXP)
313 set_bit(LFL_NOBLOCK, &lp->flags); 512 set_bit(LFL_NOBLOCK, &lp->flags);
@@ -351,7 +550,7 @@ void gdlm_cancel(void *lock)
351 if (delay_list) { 550 if (delay_list) {
352 set_bit(LFL_CANCEL, &lp->flags); 551 set_bit(LFL_CANCEL, &lp->flags);
353 set_bit(LFL_ACTIVE, &lp->flags); 552 set_bit(LFL_ACTIVE, &lp->flags);
354 queue_complete(lp); 553 gdlm_ast(lp);
355 return; 554 return;
356 } 555 }
357 556
@@ -507,22 +706,3 @@ void gdlm_submit_delayed(struct gdlm_ls *ls)
507 wake_up(&ls->thread_wait); 706 wake_up(&ls->thread_wait);
508} 707}
509 708
510int gdlm_release_all_locks(struct gdlm_ls *ls)
511{
512 struct gdlm_lock *lp, *safe;
513 int count = 0;
514
515 spin_lock(&ls->async_lock);
516 list_for_each_entry_safe(lp, safe, &ls->all_locks, all_list) {
517 list_del_init(&lp->all_list);
518
519 if (lp->lvb && lp->lvb != junk_lvb)
520 kfree(lp->lvb);
521 kfree(lp);
522 count++;
523 }
524 spin_unlock(&ls->async_lock);
525
526 return count;
527}
528
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
index a243cf69c54e..3c98e7c6f93b 100644
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -72,19 +72,12 @@ struct gdlm_ls {
72 int recover_jid_done; 72 int recover_jid_done;
73 int recover_jid_status; 73 int recover_jid_status;
74 spinlock_t async_lock; 74 spinlock_t async_lock;
75 struct list_head complete;
76 struct list_head blocking;
77 struct list_head delayed; 75 struct list_head delayed;
78 struct list_head submit; 76 struct list_head submit;
79 struct list_head all_locks;
80 u32 all_locks_count; 77 u32 all_locks_count;
81 wait_queue_head_t wait_control; 78 wait_queue_head_t wait_control;
82 struct task_struct *thread1; 79 struct task_struct *thread;
83 struct task_struct *thread2;
84 wait_queue_head_t thread_wait; 80 wait_queue_head_t thread_wait;
85 unsigned long drop_time;
86 int drop_locks_count;
87 int drop_locks_period;
88}; 81};
89 82
90enum { 83enum {
@@ -117,12 +110,7 @@ struct gdlm_lock {
117 u32 lkf; /* dlm flags DLM_LKF_ */ 110 u32 lkf; /* dlm flags DLM_LKF_ */
118 unsigned long flags; /* lock_dlm flags LFL_ */ 111 unsigned long flags; /* lock_dlm flags LFL_ */
119 112
120 int bast_mode; /* protected by async_lock */
121
122 struct list_head clist; /* complete */
123 struct list_head blist; /* blocking */
124 struct list_head delay_list; /* delayed */ 113 struct list_head delay_list; /* delayed */
125 struct list_head all_list; /* all locks for the fs */
126 struct gdlm_lock *hold_null; /* NL lock for hold_lvb */ 114 struct gdlm_lock *hold_null; /* NL lock for hold_lvb */
127}; 115};
128 116
@@ -159,11 +147,7 @@ void gdlm_release_threads(struct gdlm_ls *);
159 147
160/* lock.c */ 148/* lock.c */
161 149
162s16 gdlm_make_lmstate(s16);
163void gdlm_queue_delayed(struct gdlm_lock *);
164void gdlm_submit_delayed(struct gdlm_ls *); 150void gdlm_submit_delayed(struct gdlm_ls *);
165int gdlm_release_all_locks(struct gdlm_ls *);
166void gdlm_delete_lp(struct gdlm_lock *);
167unsigned int gdlm_do_lock(struct gdlm_lock *); 151unsigned int gdlm_do_lock(struct gdlm_lock *);
168 152
169int gdlm_get_lock(void *, struct lm_lockname *, void **); 153int gdlm_get_lock(void *, struct lm_lockname *, void **);
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index 470bdf650b50..09d78c216f48 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -22,22 +22,14 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp,
22 if (!ls) 22 if (!ls)
23 return NULL; 23 return NULL;
24 24
25 ls->drop_locks_count = GDLM_DROP_COUNT;
26 ls->drop_locks_period = GDLM_DROP_PERIOD;
27 ls->fscb = cb; 25 ls->fscb = cb;
28 ls->sdp = sdp; 26 ls->sdp = sdp;
29 ls->fsflags = flags; 27 ls->fsflags = flags;
30 spin_lock_init(&ls->async_lock); 28 spin_lock_init(&ls->async_lock);
31 INIT_LIST_HEAD(&ls->complete);
32 INIT_LIST_HEAD(&ls->blocking);
33 INIT_LIST_HEAD(&ls->delayed); 29 INIT_LIST_HEAD(&ls->delayed);
34 INIT_LIST_HEAD(&ls->submit); 30 INIT_LIST_HEAD(&ls->submit);
35 INIT_LIST_HEAD(&ls->all_locks);
36 init_waitqueue_head(&ls->thread_wait); 31 init_waitqueue_head(&ls->thread_wait);
37 init_waitqueue_head(&ls->wait_control); 32 init_waitqueue_head(&ls->wait_control);
38 ls->thread1 = NULL;
39 ls->thread2 = NULL;
40 ls->drop_time = jiffies;
41 ls->jid = -1; 33 ls->jid = -1;
42 34
43 strncpy(buf, table_name, 256); 35 strncpy(buf, table_name, 256);
@@ -180,7 +172,6 @@ out:
180static void gdlm_unmount(void *lockspace) 172static void gdlm_unmount(void *lockspace)
181{ 173{
182 struct gdlm_ls *ls = lockspace; 174 struct gdlm_ls *ls = lockspace;
183 int rv;
184 175
185 log_debug("unmount flags %lx", ls->flags); 176 log_debug("unmount flags %lx", ls->flags);
186 177
@@ -194,9 +185,7 @@ static void gdlm_unmount(void *lockspace)
194 gdlm_kobject_release(ls); 185 gdlm_kobject_release(ls);
195 dlm_release_lockspace(ls->dlm_lockspace, 2); 186 dlm_release_lockspace(ls->dlm_lockspace, 2);
196 gdlm_release_threads(ls); 187 gdlm_release_threads(ls);
197 rv = gdlm_release_all_locks(ls); 188 BUG_ON(ls->all_locks_count);
198 if (rv)
199 log_info("gdlm_unmount: %d stray locks freed", rv);
200out: 189out:
201 kfree(ls); 190 kfree(ls);
202} 191}
@@ -232,7 +221,6 @@ static void gdlm_withdraw(void *lockspace)
232 221
233 dlm_release_lockspace(ls->dlm_lockspace, 2); 222 dlm_release_lockspace(ls->dlm_lockspace, 2);
234 gdlm_release_threads(ls); 223 gdlm_release_threads(ls);
235 gdlm_release_all_locks(ls);
236 gdlm_kobject_release(ls); 224 gdlm_kobject_release(ls);
237} 225}
238 226
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index a4ff271df9ee..4ec571c3d8a9 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -114,17 +114,6 @@ static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf)
114 return sprintf(buf, "%d\n", ls->recover_jid_status); 114 return sprintf(buf, "%d\n", ls->recover_jid_status);
115} 115}
116 116
117static ssize_t drop_count_show(struct gdlm_ls *ls, char *buf)
118{
119 return sprintf(buf, "%d\n", ls->drop_locks_count);
120}
121
122static ssize_t drop_count_store(struct gdlm_ls *ls, const char *buf, size_t len)
123{
124 ls->drop_locks_count = simple_strtol(buf, NULL, 0);
125 return len;
126}
127
128struct gdlm_attr { 117struct gdlm_attr {
129 struct attribute attr; 118 struct attribute attr;
130 ssize_t (*show)(struct gdlm_ls *, char *); 119 ssize_t (*show)(struct gdlm_ls *, char *);
@@ -144,7 +133,6 @@ GDLM_ATTR(first_done, 0444, first_done_show, NULL);
144GDLM_ATTR(recover, 0644, recover_show, recover_store); 133GDLM_ATTR(recover, 0644, recover_show, recover_store);
145GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); 134GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
146GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); 135GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
147GDLM_ATTR(drop_count, 0644, drop_count_show, drop_count_store);
148 136
149static struct attribute *gdlm_attrs[] = { 137static struct attribute *gdlm_attrs[] = {
150 &gdlm_attr_proto_name.attr, 138 &gdlm_attr_proto_name.attr,
@@ -157,7 +145,6 @@ static struct attribute *gdlm_attrs[] = {
157 &gdlm_attr_recover.attr, 145 &gdlm_attr_recover.attr,
158 &gdlm_attr_recover_done.attr, 146 &gdlm_attr_recover_done.attr,
159 &gdlm_attr_recover_status.attr, 147 &gdlm_attr_recover_status.attr,
160 &gdlm_attr_drop_count.attr,
161 NULL, 148 NULL,
162}; 149};
163 150
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
index e53db6fd28ab..38823efd698c 100644
--- a/fs/gfs2/locking/dlm/thread.c
+++ b/fs/gfs2/locking/dlm/thread.c
@@ -9,367 +9,60 @@
9 9
10#include "lock_dlm.h" 10#include "lock_dlm.h"
11 11
12/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm 12static inline int no_work(struct gdlm_ls *ls)
13 thread gets to it. */
14
15static void queue_submit(struct gdlm_lock *lp)
16{
17 struct gdlm_ls *ls = lp->ls;
18
19 spin_lock(&ls->async_lock);
20 list_add_tail(&lp->delay_list, &ls->submit);
21 spin_unlock(&ls->async_lock);
22 wake_up(&ls->thread_wait);
23}
24
25static void process_blocking(struct gdlm_lock *lp, int bast_mode)
26{
27 struct gdlm_ls *ls = lp->ls;
28 unsigned int cb = 0;
29
30 switch (gdlm_make_lmstate(bast_mode)) {
31 case LM_ST_EXCLUSIVE:
32 cb = LM_CB_NEED_E;
33 break;
34 case LM_ST_DEFERRED:
35 cb = LM_CB_NEED_D;
36 break;
37 case LM_ST_SHARED:
38 cb = LM_CB_NEED_S;
39 break;
40 default:
41 gdlm_assert(0, "unknown bast mode %u", lp->bast_mode);
42 }
43
44 ls->fscb(ls->sdp, cb, &lp->lockname);
45}
46
47static void wake_up_ast(struct gdlm_lock *lp)
48{
49 clear_bit(LFL_AST_WAIT, &lp->flags);
50 smp_mb__after_clear_bit();
51 wake_up_bit(&lp->flags, LFL_AST_WAIT);
52}
53
54static void process_complete(struct gdlm_lock *lp)
55{
56 struct gdlm_ls *ls = lp->ls;
57 struct lm_async_cb acb;
58 s16 prev_mode = lp->cur;
59
60 memset(&acb, 0, sizeof(acb));
61
62 if (lp->lksb.sb_status == -DLM_ECANCEL) {
63 log_info("complete dlm cancel %x,%llx flags %lx",
64 lp->lockname.ln_type,
65 (unsigned long long)lp->lockname.ln_number,
66 lp->flags);
67
68 lp->req = lp->cur;
69 acb.lc_ret |= LM_OUT_CANCELED;
70 if (lp->cur == DLM_LOCK_IV)
71 lp->lksb.sb_lkid = 0;
72 goto out;
73 }
74
75 if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
76 if (lp->lksb.sb_status != -DLM_EUNLOCK) {
77 log_info("unlock sb_status %d %x,%llx flags %lx",
78 lp->lksb.sb_status, lp->lockname.ln_type,
79 (unsigned long long)lp->lockname.ln_number,
80 lp->flags);
81 return;
82 }
83
84 lp->cur = DLM_LOCK_IV;
85 lp->req = DLM_LOCK_IV;
86 lp->lksb.sb_lkid = 0;
87
88 if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
89 gdlm_delete_lp(lp);
90 return;
91 }
92 goto out;
93 }
94
95 if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
96 memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
97
98 if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
99 if (lp->req == DLM_LOCK_PR)
100 lp->req = DLM_LOCK_CW;
101 else if (lp->req == DLM_LOCK_CW)
102 lp->req = DLM_LOCK_PR;
103 }
104
105 /*
106 * A canceled lock request. The lock was just taken off the delayed
107 * list and was never even submitted to dlm.
108 */
109
110 if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
111 log_info("complete internal cancel %x,%llx",
112 lp->lockname.ln_type,
113 (unsigned long long)lp->lockname.ln_number);
114 lp->req = lp->cur;
115 acb.lc_ret |= LM_OUT_CANCELED;
116 goto out;
117 }
118
119 /*
120 * An error occured.
121 */
122
123 if (lp->lksb.sb_status) {
124 /* a "normal" error */
125 if ((lp->lksb.sb_status == -EAGAIN) &&
126 (lp->lkf & DLM_LKF_NOQUEUE)) {
127 lp->req = lp->cur;
128 if (lp->cur == DLM_LOCK_IV)
129 lp->lksb.sb_lkid = 0;
130 goto out;
131 }
132
133 /* this could only happen with cancels I think */
134 log_info("ast sb_status %d %x,%llx flags %lx",
135 lp->lksb.sb_status, lp->lockname.ln_type,
136 (unsigned long long)lp->lockname.ln_number,
137 lp->flags);
138 if (lp->lksb.sb_status == -EDEADLOCK &&
139 lp->ls->fsflags & LM_MFLAG_CONV_NODROP) {
140 lp->req = lp->cur;
141 acb.lc_ret |= LM_OUT_CONV_DEADLK;
142 if (lp->cur == DLM_LOCK_IV)
143 lp->lksb.sb_lkid = 0;
144 goto out;
145 } else
146 return;
147 }
148
149 /*
150 * This is an AST for an EX->EX conversion for sync_lvb from GFS.
151 */
152
153 if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
154 wake_up_ast(lp);
155 return;
156 }
157
158 /*
159 * A lock has been demoted to NL because it initially completed during
160 * BLOCK_LOCKS. Now it must be requested in the originally requested
161 * mode.
162 */
163
164 if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
165 gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx",
166 lp->lockname.ln_type,
167 (unsigned long long)lp->lockname.ln_number);
168 gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx",
169 lp->lockname.ln_type,
170 (unsigned long long)lp->lockname.ln_number);
171
172 lp->cur = DLM_LOCK_NL;
173 lp->req = lp->prev_req;
174 lp->prev_req = DLM_LOCK_IV;
175 lp->lkf &= ~DLM_LKF_CONVDEADLK;
176
177 set_bit(LFL_NOCACHE, &lp->flags);
178
179 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
180 !test_bit(LFL_NOBLOCK, &lp->flags))
181 gdlm_queue_delayed(lp);
182 else
183 queue_submit(lp);
184 return;
185 }
186
187 /*
188 * A request is granted during dlm recovery. It may be granted
189 * because the locks of a failed node were cleared. In that case,
190 * there may be inconsistent data beneath this lock and we must wait
191 * for recovery to complete to use it. When gfs recovery is done this
192 * granted lock will be converted to NL and then reacquired in this
193 * granted state.
194 */
195
196 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
197 !test_bit(LFL_NOBLOCK, &lp->flags) &&
198 lp->req != DLM_LOCK_NL) {
199
200 lp->cur = lp->req;
201 lp->prev_req = lp->req;
202 lp->req = DLM_LOCK_NL;
203 lp->lkf |= DLM_LKF_CONVERT;
204 lp->lkf &= ~DLM_LKF_CONVDEADLK;
205
206 log_debug("rereq %x,%llx id %x %d,%d",
207 lp->lockname.ln_type,
208 (unsigned long long)lp->lockname.ln_number,
209 lp->lksb.sb_lkid, lp->cur, lp->req);
210
211 set_bit(LFL_REREQUEST, &lp->flags);
212 queue_submit(lp);
213 return;
214 }
215
216 /*
217 * DLM demoted the lock to NL before it was granted so GFS must be
218 * told it cannot cache data for this lock.
219 */
220
221 if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
222 set_bit(LFL_NOCACHE, &lp->flags);
223
224out:
225 /*
226 * This is an internal lock_dlm lock
227 */
228
229 if (test_bit(LFL_INLOCK, &lp->flags)) {
230 clear_bit(LFL_NOBLOCK, &lp->flags);
231 lp->cur = lp->req;
232 wake_up_ast(lp);
233 return;
234 }
235
236 /*
237 * Normal completion of a lock request. Tell GFS it now has the lock.
238 */
239
240 clear_bit(LFL_NOBLOCK, &lp->flags);
241 lp->cur = lp->req;
242
243 acb.lc_name = lp->lockname;
244 acb.lc_ret |= gdlm_make_lmstate(lp->cur);
245
246 if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) &&
247 (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL))
248 acb.lc_ret |= LM_OUT_CACHEABLE;
249
250 ls->fscb(ls->sdp, LM_CB_ASYNC, &acb);
251}
252
253static inline int no_work(struct gdlm_ls *ls, int blocking)
254{ 13{
255 int ret; 14 int ret;
256 15
257 spin_lock(&ls->async_lock); 16 spin_lock(&ls->async_lock);
258 ret = list_empty(&ls->complete) && list_empty(&ls->submit); 17 ret = list_empty(&ls->submit);
259 if (ret && blocking)
260 ret = list_empty(&ls->blocking);
261 spin_unlock(&ls->async_lock); 18 spin_unlock(&ls->async_lock);
262 19
263 return ret; 20 return ret;
264} 21}
265 22
266static inline int check_drop(struct gdlm_ls *ls) 23static int gdlm_thread(void *data)
267{
268 if (!ls->drop_locks_count)
269 return 0;
270
271 if (time_after(jiffies, ls->drop_time + ls->drop_locks_period * HZ)) {
272 ls->drop_time = jiffies;
273 if (ls->all_locks_count >= ls->drop_locks_count)
274 return 1;
275 }
276 return 0;
277}
278
279static int gdlm_thread(void *data, int blist)
280{ 24{
281 struct gdlm_ls *ls = (struct gdlm_ls *) data; 25 struct gdlm_ls *ls = (struct gdlm_ls *) data;
282 struct gdlm_lock *lp = NULL; 26 struct gdlm_lock *lp = NULL;
283 uint8_t complete, blocking, submit, drop;
284
285 /* Only thread1 is allowed to do blocking callbacks since gfs
286 may wait for a completion callback within a blocking cb. */
287 27
288 while (!kthread_should_stop()) { 28 while (!kthread_should_stop()) {
289 wait_event_interruptible(ls->thread_wait, 29 wait_event_interruptible(ls->thread_wait,
290 !no_work(ls, blist) || kthread_should_stop()); 30 !no_work(ls) || kthread_should_stop());
291
292 complete = blocking = submit = drop = 0;
293 31
294 spin_lock(&ls->async_lock); 32 spin_lock(&ls->async_lock);
295 33
296 if (blist && !list_empty(&ls->blocking)) { 34 if (!list_empty(&ls->submit)) {
297 lp = list_entry(ls->blocking.next, struct gdlm_lock,
298 blist);
299 list_del_init(&lp->blist);
300 blocking = lp->bast_mode;
301 lp->bast_mode = 0;
302 } else if (!list_empty(&ls->complete)) {
303 lp = list_entry(ls->complete.next, struct gdlm_lock,
304 clist);
305 list_del_init(&lp->clist);
306 complete = 1;
307 } else if (!list_empty(&ls->submit)) {
308 lp = list_entry(ls->submit.next, struct gdlm_lock, 35 lp = list_entry(ls->submit.next, struct gdlm_lock,
309 delay_list); 36 delay_list);
310 list_del_init(&lp->delay_list); 37 list_del_init(&lp->delay_list);
311 submit = 1; 38 spin_unlock(&ls->async_lock);
39 gdlm_do_lock(lp);
40 spin_lock(&ls->async_lock);
312 } 41 }
313
314 drop = check_drop(ls);
315 spin_unlock(&ls->async_lock); 42 spin_unlock(&ls->async_lock);
316
317 if (complete)
318 process_complete(lp);
319
320 else if (blocking)
321 process_blocking(lp, blocking);
322
323 else if (submit)
324 gdlm_do_lock(lp);
325
326 if (drop)
327 ls->fscb(ls->sdp, LM_CB_DROPLOCKS, NULL);
328
329 schedule();
330 } 43 }
331 44
332 return 0; 45 return 0;
333} 46}
334 47
335static int gdlm_thread1(void *data)
336{
337 return gdlm_thread(data, 1);
338}
339
340static int gdlm_thread2(void *data)
341{
342 return gdlm_thread(data, 0);
343}
344
345int gdlm_init_threads(struct gdlm_ls *ls) 48int gdlm_init_threads(struct gdlm_ls *ls)
346{ 49{
347 struct task_struct *p; 50 struct task_struct *p;
348 int error; 51 int error;
349 52
350 p = kthread_run(gdlm_thread1, ls, "lock_dlm1"); 53 p = kthread_run(gdlm_thread, ls, "lock_dlm");
351 error = IS_ERR(p);
352 if (error) {
353 log_error("can't start lock_dlm1 thread %d", error);
354 return error;
355 }
356 ls->thread1 = p;
357
358 p = kthread_run(gdlm_thread2, ls, "lock_dlm2");
359 error = IS_ERR(p); 54 error = IS_ERR(p);
360 if (error) { 55 if (error) {
361 log_error("can't start lock_dlm2 thread %d", error); 56 log_error("can't start lock_dlm thread %d", error);
362 kthread_stop(ls->thread1);
363 return error; 57 return error;
364 } 58 }
365 ls->thread2 = p; 59 ls->thread = p;
366 60
367 return 0; 61 return 0;
368} 62}
369 63
370void gdlm_release_threads(struct gdlm_ls *ls) 64void gdlm_release_threads(struct gdlm_ls *ls)
371{ 65{
372 kthread_stop(ls->thread1); 66 kthread_stop(ls->thread);
373 kthread_stop(ls->thread2);
374} 67}
375 68
diff --git a/fs/gfs2/locking/nolock/Makefile b/fs/gfs2/locking/nolock/Makefile
deleted file mode 100644
index 35e9730bc3a8..000000000000
--- a/fs/gfs2/locking/nolock/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
1obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += lock_nolock.o
2lock_nolock-y := main.o
3
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c
deleted file mode 100644
index 284a5ece8d94..000000000000
--- a/fs/gfs2/locking/nolock/main.c
+++ /dev/null
@@ -1,238 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/module.h>
11#include <linux/slab.h>
12#include <linux/init.h>
13#include <linux/types.h>
14#include <linux/fs.h>
15#include <linux/lm_interface.h>
16
17struct nolock_lockspace {
18 unsigned int nl_lvb_size;
19};
20
21static const struct lm_lockops nolock_ops;
22
23static int nolock_mount(char *table_name, char *host_data,
24 lm_callback_t cb, void *cb_data,
25 unsigned int min_lvb_size, int flags,
26 struct lm_lockstruct *lockstruct,
27 struct kobject *fskobj)
28{
29 char *c;
30 unsigned int jid;
31 struct nolock_lockspace *nl;
32
33 c = strstr(host_data, "jid=");
34 if (!c)
35 jid = 0;
36 else {
37 c += 4;
38 sscanf(c, "%u", &jid);
39 }
40
41 nl = kzalloc(sizeof(struct nolock_lockspace), GFP_KERNEL);
42 if (!nl)
43 return -ENOMEM;
44
45 nl->nl_lvb_size = min_lvb_size;
46
47 lockstruct->ls_jid = jid;
48 lockstruct->ls_first = 1;
49 lockstruct->ls_lvb_size = min_lvb_size;
50 lockstruct->ls_lockspace = nl;
51 lockstruct->ls_ops = &nolock_ops;
52 lockstruct->ls_flags = LM_LSFLAG_LOCAL;
53
54 return 0;
55}
56
57static void nolock_others_may_mount(void *lockspace)
58{
59}
60
61static void nolock_unmount(void *lockspace)
62{
63 struct nolock_lockspace *nl = lockspace;
64 kfree(nl);
65}
66
67static void nolock_withdraw(void *lockspace)
68{
69}
70
71/**
72 * nolock_get_lock - get a lm_lock_t given a descripton of the lock
73 * @lockspace: the lockspace the lock lives in
74 * @name: the name of the lock
75 * @lockp: return the lm_lock_t here
76 *
77 * Returns: 0 on success, -EXXX on failure
78 */
79
80static int nolock_get_lock(void *lockspace, struct lm_lockname *name,
81 void **lockp)
82{
83 *lockp = lockspace;
84 return 0;
85}
86
87/**
88 * nolock_put_lock - get rid of a lock structure
89 * @lock: the lock to throw away
90 *
91 */
92
93static void nolock_put_lock(void *lock)
94{
95}
96
97/**
98 * nolock_lock - acquire a lock
99 * @lock: the lock to manipulate
100 * @cur_state: the current state
101 * @req_state: the requested state
102 * @flags: modifier flags
103 *
104 * Returns: A bitmap of LM_OUT_*
105 */
106
107static unsigned int nolock_lock(void *lock, unsigned int cur_state,
108 unsigned int req_state, unsigned int flags)
109{
110 return req_state | LM_OUT_CACHEABLE;
111}
112
113/**
114 * nolock_unlock - unlock a lock
115 * @lock: the lock to manipulate
116 * @cur_state: the current state
117 *
118 * Returns: 0
119 */
120
121static unsigned int nolock_unlock(void *lock, unsigned int cur_state)
122{
123 return 0;
124}
125
126static void nolock_cancel(void *lock)
127{
128}
129
130/**
131 * nolock_hold_lvb - hold on to a lock value block
132 * @lock: the lock the LVB is associated with
133 * @lvbp: return the lm_lvb_t here
134 *
135 * Returns: 0 on success, -EXXX on failure
136 */
137
138static int nolock_hold_lvb(void *lock, char **lvbp)
139{
140 struct nolock_lockspace *nl = lock;
141 int error = 0;
142
143 *lvbp = kzalloc(nl->nl_lvb_size, GFP_NOFS);
144 if (!*lvbp)
145 error = -ENOMEM;
146
147 return error;
148}
149
150/**
151 * nolock_unhold_lvb - release a LVB
152 * @lock: the lock the LVB is associated with
153 * @lvb: the lock value block
154 *
155 */
156
157static void nolock_unhold_lvb(void *lock, char *lvb)
158{
159 kfree(lvb);
160}
161
162static int nolock_plock_get(void *lockspace, struct lm_lockname *name,
163 struct file *file, struct file_lock *fl)
164{
165 posix_test_lock(file, fl);
166
167 return 0;
168}
169
170static int nolock_plock(void *lockspace, struct lm_lockname *name,
171 struct file *file, int cmd, struct file_lock *fl)
172{
173 int error;
174 error = posix_lock_file_wait(file, fl);
175 return error;
176}
177
178static int nolock_punlock(void *lockspace, struct lm_lockname *name,
179 struct file *file, struct file_lock *fl)
180{
181 int error;
182 error = posix_lock_file_wait(file, fl);
183 return error;
184}
185
186static void nolock_recovery_done(void *lockspace, unsigned int jid,
187 unsigned int message)
188{
189}
190
191static const struct lm_lockops nolock_ops = {
192 .lm_proto_name = "lock_nolock",
193 .lm_mount = nolock_mount,
194 .lm_others_may_mount = nolock_others_may_mount,
195 .lm_unmount = nolock_unmount,
196 .lm_withdraw = nolock_withdraw,
197 .lm_get_lock = nolock_get_lock,
198 .lm_put_lock = nolock_put_lock,
199 .lm_lock = nolock_lock,
200 .lm_unlock = nolock_unlock,
201 .lm_cancel = nolock_cancel,
202 .lm_hold_lvb = nolock_hold_lvb,
203 .lm_unhold_lvb = nolock_unhold_lvb,
204 .lm_plock_get = nolock_plock_get,
205 .lm_plock = nolock_plock,
206 .lm_punlock = nolock_punlock,
207 .lm_recovery_done = nolock_recovery_done,
208 .lm_owner = THIS_MODULE,
209};
210
211static int __init init_nolock(void)
212{
213 int error;
214
215 error = gfs2_register_lockproto(&nolock_ops);
216 if (error) {
217 printk(KERN_WARNING
218 "lock_nolock: can't register protocol: %d\n", error);
219 return error;
220 }
221
222 printk(KERN_INFO
223 "Lock_Nolock (built %s %s) installed\n", __DATE__, __TIME__);
224 return 0;
225}
226
227static void __exit exit_nolock(void)
228{
229 gfs2_unregister_lockproto(&nolock_ops);
230}
231
232module_init(init_nolock);
233module_exit(exit_nolock);
234
235MODULE_DESCRIPTION("GFS Nolock Locking Module");
236MODULE_AUTHOR("Red Hat, Inc.");
237MODULE_LICENSE("GPL");
238
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 548264b1836d..6c6af9f5e3ab 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -87,6 +87,8 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
87 */ 87 */
88 88
89static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 89static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
90__releases(&sdp->sd_log_lock)
91__acquires(&sdp->sd_log_lock)
90{ 92{
91 struct gfs2_bufdata *bd, *s; 93 struct gfs2_bufdata *bd, *s;
92 struct buffer_head *bh; 94 struct buffer_head *bh;
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 771152816508..7c64510ccfd2 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -21,6 +21,7 @@
21 */ 21 */
22 22
23static inline void gfs2_log_lock(struct gfs2_sbd *sdp) 23static inline void gfs2_log_lock(struct gfs2_sbd *sdp)
24__acquires(&sdp->sd_log_lock)
24{ 25{
25 spin_lock(&sdp->sd_log_lock); 26 spin_lock(&sdp->sd_log_lock);
26} 27}
@@ -32,6 +33,7 @@ static inline void gfs2_log_lock(struct gfs2_sbd *sdp)
32 */ 33 */
33 34
34static inline void gfs2_log_unlock(struct gfs2_sbd *sdp) 35static inline void gfs2_log_unlock(struct gfs2_sbd *sdp)
36__releases(&sdp->sd_log_lock)
35{ 37{
36 spin_unlock(&sdp->sd_log_lock); 38 spin_unlock(&sdp->sd_log_lock);
37} 39}
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 053e2ebbbd50..bcc668d0fadd 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -40,8 +40,6 @@ static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo)
40 INIT_HLIST_NODE(&gl->gl_list); 40 INIT_HLIST_NODE(&gl->gl_list);
41 spin_lock_init(&gl->gl_spin); 41 spin_lock_init(&gl->gl_spin);
42 INIT_LIST_HEAD(&gl->gl_holders); 42 INIT_LIST_HEAD(&gl->gl_holders);
43 INIT_LIST_HEAD(&gl->gl_waiters1);
44 INIT_LIST_HEAD(&gl->gl_waiters3);
45 gl->gl_lvb = NULL; 43 gl->gl_lvb = NULL;
46 atomic_set(&gl->gl_lvb_count, 0); 44 atomic_set(&gl->gl_lvb_count, 0);
47 INIT_LIST_HEAD(&gl->gl_reclaim); 45 INIT_LIST_HEAD(&gl->gl_reclaim);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 78d75f892f82..09853620c951 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -129,7 +129,7 @@ void gfs2_meta_sync(struct gfs2_glock *gl)
129} 129}
130 130
131/** 131/**
132 * getbuf - Get a buffer with a given address space 132 * gfs2_getbuf - Get a buffer with a given address space
133 * @gl: the glock 133 * @gl: the glock
134 * @blkno: the block number (filesystem scope) 134 * @blkno: the block number (filesystem scope)
135 * @create: 1 if the buffer should be created 135 * @create: 1 if the buffer should be created
@@ -137,7 +137,7 @@ void gfs2_meta_sync(struct gfs2_glock *gl)
137 * Returns: the buffer 137 * Returns: the buffer
138 */ 138 */
139 139
140static struct buffer_head *getbuf(struct gfs2_glock *gl, u64 blkno, int create) 140struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
141{ 141{
142 struct address_space *mapping = gl->gl_aspace->i_mapping; 142 struct address_space *mapping = gl->gl_aspace->i_mapping;
143 struct gfs2_sbd *sdp = gl->gl_sbd; 143 struct gfs2_sbd *sdp = gl->gl_sbd;
@@ -205,7 +205,7 @@ static void meta_prep_new(struct buffer_head *bh)
205struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) 205struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
206{ 206{
207 struct buffer_head *bh; 207 struct buffer_head *bh;
208 bh = getbuf(gl, blkno, CREATE); 208 bh = gfs2_getbuf(gl, blkno, CREATE);
209 meta_prep_new(bh); 209 meta_prep_new(bh);
210 return bh; 210 return bh;
211} 211}
@@ -223,7 +223,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
223int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, 223int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
224 struct buffer_head **bhp) 224 struct buffer_head **bhp)
225{ 225{
226 *bhp = getbuf(gl, blkno, CREATE); 226 *bhp = gfs2_getbuf(gl, blkno, CREATE);
227 if (!buffer_uptodate(*bhp)) { 227 if (!buffer_uptodate(*bhp)) {
228 ll_rw_block(READ_META, 1, bhp); 228 ll_rw_block(READ_META, 1, bhp);
229 if (flags & DIO_WAIT) { 229 if (flags & DIO_WAIT) {
@@ -346,7 +346,7 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
346 struct buffer_head *bh; 346 struct buffer_head *bh;
347 347
348 while (blen) { 348 while (blen) {
349 bh = getbuf(ip->i_gl, bstart, NO_CREATE); 349 bh = gfs2_getbuf(ip->i_gl, bstart, NO_CREATE);
350 if (bh) { 350 if (bh) {
351 lock_buffer(bh); 351 lock_buffer(bh);
352 gfs2_log_lock(sdp); 352 gfs2_log_lock(sdp);
@@ -421,7 +421,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
421 if (extlen > max_ra) 421 if (extlen > max_ra)
422 extlen = max_ra; 422 extlen = max_ra;
423 423
424 first_bh = getbuf(gl, dblock, CREATE); 424 first_bh = gfs2_getbuf(gl, dblock, CREATE);
425 425
426 if (buffer_uptodate(first_bh)) 426 if (buffer_uptodate(first_bh))
427 goto out; 427 goto out;
@@ -432,7 +432,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
432 extlen--; 432 extlen--;
433 433
434 while (extlen) { 434 while (extlen) {
435 bh = getbuf(gl, dblock, CREATE); 435 bh = gfs2_getbuf(gl, dblock, CREATE);
436 436
437 if (!buffer_uptodate(bh) && !buffer_locked(bh)) 437 if (!buffer_uptodate(bh) && !buffer_locked(bh))
438 ll_rw_block(READA, 1, &bh); 438 ll_rw_block(READA, 1, &bh);
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 73e3b1c76fe1..b1a5f3674d43 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -47,6 +47,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
47int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, 47int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno,
48 int flags, struct buffer_head **bhp); 48 int flags, struct buffer_head **bhp);
49int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); 49int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
50struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create);
50 51
51void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, 52void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
52 int meta); 53 int meta);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index f55394e57cb2..e64a1b04117a 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -499,34 +499,34 @@ static int __gfs2_readpage(void *file, struct page *page)
499 * @file: The file to read 499 * @file: The file to read
500 * @page: The page of the file 500 * @page: The page of the file
501 * 501 *
502 * This deals with the locking required. We use a trylock in order to 502 * This deals with the locking required. We have to unlock and
503 * avoid the page lock / glock ordering problems returning AOP_TRUNCATED_PAGE 503 * relock the page in order to get the locking in the right
504 * in the event that we are unable to get the lock. 504 * order.
505 */ 505 */
506 506
507static int gfs2_readpage(struct file *file, struct page *page) 507static int gfs2_readpage(struct file *file, struct page *page)
508{ 508{
509 struct gfs2_inode *ip = GFS2_I(page->mapping->host); 509 struct address_space *mapping = page->mapping;
510 struct gfs2_holder *gh; 510 struct gfs2_inode *ip = GFS2_I(mapping->host);
511 struct gfs2_holder gh;
511 int error; 512 int error;
512 513
513 gh = gfs2_glock_is_locked_by_me(ip->i_gl); 514 unlock_page(page);
514 if (!gh) { 515 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
515 gh = kmalloc(sizeof(struct gfs2_holder), GFP_NOFS); 516 error = gfs2_glock_nq_atime(&gh);
516 if (!gh) 517 if (unlikely(error))
517 return -ENOBUFS; 518 goto out;
518 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, gh); 519 error = AOP_TRUNCATED_PAGE;
520 lock_page(page);
521 if (page->mapping == mapping && !PageUptodate(page))
522 error = __gfs2_readpage(file, page);
523 else
519 unlock_page(page); 524 unlock_page(page);
520 error = gfs2_glock_nq_atime(gh); 525 gfs2_glock_dq(&gh);
521 if (likely(error != 0))
522 goto out;
523 return AOP_TRUNCATED_PAGE;
524 }
525 error = __gfs2_readpage(file, page);
526 gfs2_glock_dq(gh);
527out: 526out:
528 gfs2_holder_uninit(gh); 527 gfs2_holder_uninit(&gh);
529 kfree(gh); 528 if (error && error != AOP_TRUNCATED_PAGE)
529 lock_page(page);
530 return error; 530 return error;
531} 531}
532 532
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 24dd59450088..e9a366d4411c 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -15,6 +15,7 @@
15#include <linux/uio.h> 15#include <linux/uio.h>
16#include <linux/blkdev.h> 16#include <linux/blkdev.h>
17#include <linux/mm.h> 17#include <linux/mm.h>
18#include <linux/mount.h>
18#include <linux/fs.h> 19#include <linux/fs.h>
19#include <linux/gfs2_ondisk.h> 20#include <linux/gfs2_ondisk.h>
20#include <linux/ext2_fs.h> 21#include <linux/ext2_fs.h>
@@ -133,7 +134,6 @@ static const u32 fsflags_to_gfs2[32] = {
133 [7] = GFS2_DIF_NOATIME, 134 [7] = GFS2_DIF_NOATIME,
134 [12] = GFS2_DIF_EXHASH, 135 [12] = GFS2_DIF_EXHASH,
135 [14] = GFS2_DIF_INHERIT_JDATA, 136 [14] = GFS2_DIF_INHERIT_JDATA,
136 [20] = GFS2_DIF_INHERIT_DIRECTIO,
137}; 137};
138 138
139static const u32 gfs2_to_fsflags[32] = { 139static const u32 gfs2_to_fsflags[32] = {
@@ -142,7 +142,6 @@ static const u32 gfs2_to_fsflags[32] = {
142 [gfs2fl_AppendOnly] = FS_APPEND_FL, 142 [gfs2fl_AppendOnly] = FS_APPEND_FL,
143 [gfs2fl_NoAtime] = FS_NOATIME_FL, 143 [gfs2fl_NoAtime] = FS_NOATIME_FL,
144 [gfs2fl_ExHash] = FS_INDEX_FL, 144 [gfs2fl_ExHash] = FS_INDEX_FL,
145 [gfs2fl_InheritDirectio] = FS_DIRECTIO_FL,
146 [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL, 145 [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
147}; 146};
148 147
@@ -160,12 +159,8 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
160 return error; 159 return error;
161 160
162 fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_di.di_flags); 161 fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_di.di_flags);
163 if (!S_ISDIR(inode->i_mode)) { 162 if (!S_ISDIR(inode->i_mode) && ip->i_di.di_flags & GFS2_DIF_JDATA)
164 if (ip->i_di.di_flags & GFS2_DIF_JDATA) 163 fsflags |= FS_JOURNAL_DATA_FL;
165 fsflags |= FS_JOURNAL_DATA_FL;
166 if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
167 fsflags |= FS_DIRECTIO_FL;
168 }
169 if (put_user(fsflags, ptr)) 164 if (put_user(fsflags, ptr))
170 error = -EFAULT; 165 error = -EFAULT;
171 166
@@ -194,13 +189,11 @@ void gfs2_set_inode_flags(struct inode *inode)
194 189
195/* Flags that can be set by user space */ 190/* Flags that can be set by user space */
196#define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \ 191#define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \
197 GFS2_DIF_DIRECTIO| \
198 GFS2_DIF_IMMUTABLE| \ 192 GFS2_DIF_IMMUTABLE| \
199 GFS2_DIF_APPENDONLY| \ 193 GFS2_DIF_APPENDONLY| \
200 GFS2_DIF_NOATIME| \ 194 GFS2_DIF_NOATIME| \
201 GFS2_DIF_SYNC| \ 195 GFS2_DIF_SYNC| \
202 GFS2_DIF_SYSTEM| \ 196 GFS2_DIF_SYSTEM| \
203 GFS2_DIF_INHERIT_DIRECTIO| \
204 GFS2_DIF_INHERIT_JDATA) 197 GFS2_DIF_INHERIT_JDATA)
205 198
206/** 199/**
@@ -220,10 +213,14 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
220 int error; 213 int error;
221 u32 new_flags, flags; 214 u32 new_flags, flags;
222 215
223 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 216 error = mnt_want_write(filp->f_path.mnt);
224 if (error) 217 if (error)
225 return error; 218 return error;
226 219
220 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
221 if (error)
222 goto out_drop_write;
223
227 flags = ip->i_di.di_flags; 224 flags = ip->i_di.di_flags;
228 new_flags = (flags & ~mask) | (reqflags & mask); 225 new_flags = (flags & ~mask) | (reqflags & mask);
229 if ((new_flags ^ flags) == 0) 226 if ((new_flags ^ flags) == 0)
@@ -242,7 +239,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
242 !capable(CAP_LINUX_IMMUTABLE)) 239 !capable(CAP_LINUX_IMMUTABLE))
243 goto out; 240 goto out;
244 if (!IS_IMMUTABLE(inode)) { 241 if (!IS_IMMUTABLE(inode)) {
245 error = permission(inode, MAY_WRITE, NULL); 242 error = gfs2_permission(inode, MAY_WRITE);
246 if (error) 243 if (error)
247 goto out; 244 goto out;
248 } 245 }
@@ -272,6 +269,8 @@ out_trans_end:
272 gfs2_trans_end(sdp); 269 gfs2_trans_end(sdp);
273out: 270out:
274 gfs2_glock_dq_uninit(&gh); 271 gfs2_glock_dq_uninit(&gh);
272out_drop_write:
273 mnt_drop_write(filp->f_path.mnt);
275 return error; 274 return error;
276} 275}
277 276
@@ -285,8 +284,6 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
285 if (!S_ISDIR(inode->i_mode)) { 284 if (!S_ISDIR(inode->i_mode)) {
286 if (gfsflags & GFS2_DIF_INHERIT_JDATA) 285 if (gfsflags & GFS2_DIF_INHERIT_JDATA)
287 gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA); 286 gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA);
288 if (gfsflags & GFS2_DIF_INHERIT_DIRECTIO)
289 gfsflags ^= (GFS2_DIF_DIRECTIO | GFS2_DIF_INHERIT_DIRECTIO);
290 return do_gfs2_set_flags(filp, gfsflags, ~0); 287 return do_gfs2_set_flags(filp, gfsflags, ~0);
291 } 288 }
292 return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA); 289 return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA);
@@ -487,11 +484,6 @@ static int gfs2_open(struct inode *inode, struct file *file)
487 goto fail_gunlock; 484 goto fail_gunlock;
488 } 485 }
489 486
490 /* Listen to the Direct I/O flag */
491
492 if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
493 file->f_flags |= O_DIRECT;
494
495 gfs2_glock_dq_uninit(&i_gh); 487 gfs2_glock_dq_uninit(&i_gh);
496 } 488 }
497 489
@@ -669,8 +661,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
669 int error = 0; 661 int error = 0;
670 662
671 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; 663 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
672 flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE 664 flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
673 | GL_FLOCK;
674 665
675 mutex_lock(&fp->f_fl_mutex); 666 mutex_lock(&fp->f_fl_mutex);
676 667
@@ -683,9 +674,8 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
683 gfs2_glock_dq_wait(fl_gh); 674 gfs2_glock_dq_wait(fl_gh);
684 gfs2_holder_reinit(state, flags, fl_gh); 675 gfs2_holder_reinit(state, flags, fl_gh);
685 } else { 676 } else {
686 error = gfs2_glock_get(GFS2_SB(&ip->i_inode), 677 error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr,
687 ip->i_no_addr, &gfs2_flock_glops, 678 &gfs2_flock_glops, CREATE, &gl);
688 CREATE, &gl);
689 if (error) 679 if (error)
690 goto out; 680 goto out;
691 gfs2_holder_init(gl, state, flags, fl_gh); 681 gfs2_holder_init(gl, state, flags, fl_gh);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b2028c82e8d1..b4d1d6490633 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -64,7 +64,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
64 mutex_init(&sdp->sd_rindex_mutex); 64 mutex_init(&sdp->sd_rindex_mutex);
65 INIT_LIST_HEAD(&sdp->sd_rindex_list); 65 INIT_LIST_HEAD(&sdp->sd_rindex_list);
66 INIT_LIST_HEAD(&sdp->sd_rindex_mru_list); 66 INIT_LIST_HEAD(&sdp->sd_rindex_mru_list);
67 INIT_LIST_HEAD(&sdp->sd_rindex_recent_list);
68 67
69 INIT_LIST_HEAD(&sdp->sd_jindex_list); 68 INIT_LIST_HEAD(&sdp->sd_jindex_list);
70 spin_lock_init(&sdp->sd_jindex_spin); 69 spin_lock_init(&sdp->sd_jindex_spin);
@@ -364,6 +363,8 @@ static int map_journal_extents(struct gfs2_sbd *sdp)
364 363
365static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) 364static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
366{ 365{
366 if (!sdp->sd_lockstruct.ls_ops->lm_others_may_mount)
367 return;
367 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 368 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
368 sdp->sd_lockstruct.ls_ops->lm_others_may_mount( 369 sdp->sd_lockstruct.ls_ops->lm_others_may_mount(
369 sdp->sd_lockstruct.ls_lockspace); 370 sdp->sd_lockstruct.ls_lockspace);
@@ -741,8 +742,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
741 goto out; 742 goto out;
742 } 743 }
743 744
744 if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) || 745 if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) ||
745 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) ||
746 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >= 746 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >=
747 GFS2_MIN_LVB_SIZE)) { 747 GFS2_MIN_LVB_SIZE)) {
748 gfs2_unmount_lockproto(&sdp->sd_lockstruct); 748 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
@@ -873,7 +873,7 @@ fail_sb:
873fail_locking: 873fail_locking:
874 init_locking(sdp, &mount_gh, UNDO); 874 init_locking(sdp, &mount_gh, UNDO);
875fail_lm: 875fail_lm:
876 gfs2_gl_hash_clear(sdp, WAIT); 876 gfs2_gl_hash_clear(sdp);
877 gfs2_lm_unmount(sdp); 877 gfs2_lm_unmount(sdp);
878 while (invalidate_inodes(sb)) 878 while (invalidate_inodes(sb))
879 yield(); 879 yield();
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 2686ad4c0029..1e252dfc5294 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -163,7 +163,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
163 if (error) 163 if (error)
164 goto out; 164 goto out;
165 165
166 error = permission(dir, MAY_WRITE | MAY_EXEC, NULL); 166 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
167 if (error) 167 if (error)
168 goto out_gunlock; 168 goto out_gunlock;
169 169
@@ -669,7 +669,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
669 } 669 }
670 } 670 }
671 } else { 671 } else {
672 error = permission(ndir, MAY_WRITE | MAY_EXEC, NULL); 672 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC);
673 if (error) 673 if (error)
674 goto out_gunlock; 674 goto out_gunlock;
675 675
@@ -704,7 +704,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
704 /* Check out the dir to be renamed */ 704 /* Check out the dir to be renamed */
705 705
706 if (dir_rename) { 706 if (dir_rename) {
707 error = permission(odentry->d_inode, MAY_WRITE, NULL); 707 error = gfs2_permission(odentry->d_inode, MAY_WRITE);
708 if (error) 708 if (error)
709 goto out_gunlock; 709 goto out_gunlock;
710 } 710 }
@@ -891,7 +891,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
891 * Returns: errno 891 * Returns: errno
892 */ 892 */
893 893
894static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd) 894int gfs2_permission(struct inode *inode, int mask)
895{ 895{
896 struct gfs2_inode *ip = GFS2_I(inode); 896 struct gfs2_inode *ip = GFS2_I(inode);
897 struct gfs2_holder i_gh; 897 struct gfs2_holder i_gh;
@@ -905,13 +905,22 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
905 unlock = 1; 905 unlock = 1;
906 } 906 }
907 907
908 error = generic_permission(inode, mask, gfs2_check_acl); 908 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
909 error = -EACCES;
910 else
911 error = generic_permission(inode, mask, gfs2_check_acl);
909 if (unlock) 912 if (unlock)
910 gfs2_glock_dq_uninit(&i_gh); 913 gfs2_glock_dq_uninit(&i_gh);
911 914
912 return error; 915 return error;
913} 916}
914 917
918static int gfs2_iop_permission(struct inode *inode, int mask,
919 struct nameidata *nd)
920{
921 return gfs2_permission(inode, mask);
922}
923
915static int setattr_size(struct inode *inode, struct iattr *attr) 924static int setattr_size(struct inode *inode, struct iattr *attr)
916{ 925{
917 struct gfs2_inode *ip = GFS2_I(inode); 926 struct gfs2_inode *ip = GFS2_I(inode);
@@ -1141,7 +1150,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
1141} 1150}
1142 1151
1143const struct inode_operations gfs2_file_iops = { 1152const struct inode_operations gfs2_file_iops = {
1144 .permission = gfs2_permission, 1153 .permission = gfs2_iop_permission,
1145 .setattr = gfs2_setattr, 1154 .setattr = gfs2_setattr,
1146 .getattr = gfs2_getattr, 1155 .getattr = gfs2_getattr,
1147 .setxattr = gfs2_setxattr, 1156 .setxattr = gfs2_setxattr,
@@ -1160,7 +1169,7 @@ const struct inode_operations gfs2_dir_iops = {
1160 .rmdir = gfs2_rmdir, 1169 .rmdir = gfs2_rmdir,
1161 .mknod = gfs2_mknod, 1170 .mknod = gfs2_mknod,
1162 .rename = gfs2_rename, 1171 .rename = gfs2_rename,
1163 .permission = gfs2_permission, 1172 .permission = gfs2_iop_permission,
1164 .setattr = gfs2_setattr, 1173 .setattr = gfs2_setattr,
1165 .getattr = gfs2_getattr, 1174 .getattr = gfs2_getattr,
1166 .setxattr = gfs2_setxattr, 1175 .setxattr = gfs2_setxattr,
@@ -1172,7 +1181,7 @@ const struct inode_operations gfs2_dir_iops = {
1172const struct inode_operations gfs2_symlink_iops = { 1181const struct inode_operations gfs2_symlink_iops = {
1173 .readlink = gfs2_readlink, 1182 .readlink = gfs2_readlink,
1174 .follow_link = gfs2_follow_link, 1183 .follow_link = gfs2_follow_link,
1175 .permission = gfs2_permission, 1184 .permission = gfs2_iop_permission,
1176 .setattr = gfs2_setattr, 1185 .setattr = gfs2_setattr,
1177 .getattr = gfs2_getattr, 1186 .getattr = gfs2_getattr,
1178 .setxattr = gfs2_setxattr, 1187 .setxattr = gfs2_setxattr,
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 0b7cc920eb89..f66ea0f7a356 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -126,7 +126,7 @@ static void gfs2_put_super(struct super_block *sb)
126 gfs2_clear_rgrpd(sdp); 126 gfs2_clear_rgrpd(sdp);
127 gfs2_jindex_free(sdp); 127 gfs2_jindex_free(sdp);
128 /* Take apart glock structures and buffer lists */ 128 /* Take apart glock structures and buffer lists */
129 gfs2_gl_hash_clear(sdp, WAIT); 129 gfs2_gl_hash_clear(sdp);
130 /* Unmount the locking protocol */ 130 /* Unmount the locking protocol */
131 gfs2_lm_unmount(sdp); 131 gfs2_lm_unmount(sdp);
132 132
@@ -155,7 +155,7 @@ static void gfs2_write_super(struct super_block *sb)
155static int gfs2_sync_fs(struct super_block *sb, int wait) 155static int gfs2_sync_fs(struct super_block *sb, int wait)
156{ 156{
157 sb->s_dirt = 0; 157 sb->s_dirt = 0;
158 if (wait) 158 if (wait && sb->s_fs_info)
159 gfs2_log_flush(sb->s_fs_info, NULL); 159 gfs2_log_flush(sb->s_fs_info, NULL);
160 return 0; 160 return 0;
161} 161}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 56aaf915c59a..3e073f5144fa 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -904,7 +904,7 @@ static int need_sync(struct gfs2_quota_data *qd)
904 do_sync = 0; 904 do_sync = 0;
905 else { 905 else {
906 value *= gfs2_jindex_size(sdp) * num; 906 value *= gfs2_jindex_size(sdp) * num;
907 do_div(value, den); 907 value = div_s64(value, den);
908 value += (s64)be64_to_cpu(qd->qd_qb.qb_value); 908 value += (s64)be64_to_cpu(qd->qd_qb.qb_value);
909 if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit)) 909 if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit))
910 do_sync = 0; 910 do_sync = 0;
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 2888e4b4b1c5..d5e91f4f6a0b 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -428,6 +428,9 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea
428static void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, 428static void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
429 unsigned int message) 429 unsigned int message)
430{ 430{
431 if (!sdp->sd_lockstruct.ls_ops->lm_recovery_done)
432 return;
433
431 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 434 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
432 sdp->sd_lockstruct.ls_ops->lm_recovery_done( 435 sdp->sd_lockstruct.ls_ops->lm_recovery_done(
433 sdp->sd_lockstruct.ls_lockspace, jid, message); 436 sdp->sd_lockstruct.ls_lockspace, jid, message);
@@ -505,7 +508,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
505 508
506 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 509 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
507 LM_FLAG_NOEXP | LM_FLAG_PRIORITY | 510 LM_FLAG_NOEXP | LM_FLAG_PRIORITY |
508 GL_NOCANCEL | GL_NOCACHE, &t_gh); 511 GL_NOCACHE, &t_gh);
509 if (error) 512 if (error)
510 goto fail_gunlock_ji; 513 goto fail_gunlock_ji;
511 514
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 3401628d742b..2d90fb253505 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -371,11 +371,6 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp)
371 371
372 spin_lock(&sdp->sd_rindex_spin); 372 spin_lock(&sdp->sd_rindex_spin);
373 sdp->sd_rindex_forward = NULL; 373 sdp->sd_rindex_forward = NULL;
374 head = &sdp->sd_rindex_recent_list;
375 while (!list_empty(head)) {
376 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
377 list_del(&rgd->rd_recent);
378 }
379 spin_unlock(&sdp->sd_rindex_spin); 374 spin_unlock(&sdp->sd_rindex_spin);
380 375
381 head = &sdp->sd_rindex_list; 376 head = &sdp->sd_rindex_list;
@@ -945,107 +940,30 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
945} 940}
946 941
947/** 942/**
948 * recent_rgrp_first - get first RG from "recent" list
949 * @sdp: The GFS2 superblock
950 * @rglast: address of the rgrp used last
951 *
952 * Returns: The first rgrp in the recent list
953 */
954
955static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
956 u64 rglast)
957{
958 struct gfs2_rgrpd *rgd;
959
960 spin_lock(&sdp->sd_rindex_spin);
961
962 if (rglast) {
963 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
964 if (rgrp_contains_block(rgd, rglast))
965 goto out;
966 }
967 }
968 rgd = NULL;
969 if (!list_empty(&sdp->sd_rindex_recent_list))
970 rgd = list_entry(sdp->sd_rindex_recent_list.next,
971 struct gfs2_rgrpd, rd_recent);
972out:
973 spin_unlock(&sdp->sd_rindex_spin);
974 return rgd;
975}
976
977/**
978 * recent_rgrp_next - get next RG from "recent" list 943 * recent_rgrp_next - get next RG from "recent" list
979 * @cur_rgd: current rgrp 944 * @cur_rgd: current rgrp
980 * @remove:
981 * 945 *
982 * Returns: The next rgrp in the recent list 946 * Returns: The next rgrp in the recent list
983 */ 947 */
984 948
985static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd, 949static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd)
986 int remove)
987{ 950{
988 struct gfs2_sbd *sdp = cur_rgd->rd_sbd; 951 struct gfs2_sbd *sdp = cur_rgd->rd_sbd;
989 struct list_head *head; 952 struct list_head *head;
990 struct gfs2_rgrpd *rgd; 953 struct gfs2_rgrpd *rgd;
991 954
992 spin_lock(&sdp->sd_rindex_spin); 955 spin_lock(&sdp->sd_rindex_spin);
993 956 head = &sdp->sd_rindex_mru_list;
994 head = &sdp->sd_rindex_recent_list; 957 if (unlikely(cur_rgd->rd_list_mru.next == head)) {
995 958 spin_unlock(&sdp->sd_rindex_spin);
996 list_for_each_entry(rgd, head, rd_recent) { 959 return NULL;
997 if (rgd == cur_rgd) {
998 if (cur_rgd->rd_recent.next != head)
999 rgd = list_entry(cur_rgd->rd_recent.next,
1000 struct gfs2_rgrpd, rd_recent);
1001 else
1002 rgd = NULL;
1003
1004 if (remove)
1005 list_del(&cur_rgd->rd_recent);
1006
1007 goto out;
1008 }
1009 } 960 }
1010 961 rgd = list_entry(cur_rgd->rd_list_mru.next, struct gfs2_rgrpd, rd_list_mru);
1011 rgd = NULL;
1012 if (!list_empty(head))
1013 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
1014
1015out:
1016 spin_unlock(&sdp->sd_rindex_spin); 962 spin_unlock(&sdp->sd_rindex_spin);
1017 return rgd; 963 return rgd;
1018} 964}
1019 965
1020/** 966/**
1021 * recent_rgrp_add - add an RG to tail of "recent" list
1022 * @new_rgd: The rgrp to add
1023 *
1024 */
1025
1026static void recent_rgrp_add(struct gfs2_rgrpd *new_rgd)
1027{
1028 struct gfs2_sbd *sdp = new_rgd->rd_sbd;
1029 struct gfs2_rgrpd *rgd;
1030 unsigned int count = 0;
1031 unsigned int max = sdp->sd_rgrps / gfs2_jindex_size(sdp);
1032
1033 spin_lock(&sdp->sd_rindex_spin);
1034
1035 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
1036 if (rgd == new_rgd)
1037 goto out;
1038
1039 if (++count >= max)
1040 goto out;
1041 }
1042 list_add_tail(&new_rgd->rd_recent, &sdp->sd_rindex_recent_list);
1043
1044out:
1045 spin_unlock(&sdp->sd_rindex_spin);
1046}
1047
1048/**
1049 * forward_rgrp_get - get an rgrp to try next from full list 967 * forward_rgrp_get - get an rgrp to try next from full list
1050 * @sdp: The GFS2 superblock 968 * @sdp: The GFS2 superblock
1051 * 969 *
@@ -1112,9 +1030,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1112 int loops = 0; 1030 int loops = 0;
1113 int error, rg_locked; 1031 int error, rg_locked;
1114 1032
1115 /* Try recently successful rgrps */ 1033 rgd = gfs2_blk2rgrpd(sdp, ip->i_goal);
1116
1117 rgd = recent_rgrp_first(sdp, ip->i_goal);
1118 1034
1119 while (rgd) { 1035 while (rgd) {
1120 rg_locked = 0; 1036 rg_locked = 0;
@@ -1136,11 +1052,9 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1136 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1052 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1137 if (inode) 1053 if (inode)
1138 return inode; 1054 return inode;
1139 rgd = recent_rgrp_next(rgd, 1); 1055 /* fall through */
1140 break;
1141
1142 case GLR_TRYFAILED: 1056 case GLR_TRYFAILED:
1143 rgd = recent_rgrp_next(rgd, 0); 1057 rgd = recent_rgrp_next(rgd);
1144 break; 1058 break;
1145 1059
1146 default: 1060 default:
@@ -1199,7 +1113,9 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1199 1113
1200out: 1114out:
1201 if (begin) { 1115 if (begin) {
1202 recent_rgrp_add(rgd); 1116 spin_lock(&sdp->sd_rindex_spin);
1117 list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
1118 spin_unlock(&sdp->sd_rindex_spin);
1203 rgd = gfs2_rgrpd_get_next(rgd); 1119 rgd = gfs2_rgrpd_get_next(rgd);
1204 if (!rgd) 1120 if (!rgd)
1205 rgd = gfs2_rgrpd_get_first(sdp); 1121 rgd = gfs2_rgrpd_get_first(sdp);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 7aeacbc65f35..63a8a902d9db 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -65,7 +65,6 @@ void gfs2_tune_init(struct gfs2_tune *gt)
65 gt->gt_quota_quantum = 60; 65 gt->gt_quota_quantum = 60;
66 gt->gt_atime_quantum = 3600; 66 gt->gt_atime_quantum = 3600;
67 gt->gt_new_files_jdata = 0; 67 gt->gt_new_files_jdata = 0;
68 gt->gt_new_files_directio = 0;
69 gt->gt_max_readahead = 1 << 18; 68 gt->gt_max_readahead = 1 << 18;
70 gt->gt_stall_secs = 600; 69 gt->gt_stall_secs = 600;
71 gt->gt_complain_secs = 10; 70 gt->gt_complain_secs = 10;
@@ -941,8 +940,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
941 } 940 }
942 941
943 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED, 942 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED,
944 LM_FLAG_PRIORITY | GL_NOCACHE, 943 GL_NOCACHE, t_gh);
945 t_gh);
946 944
947 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { 945 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
948 error = gfs2_jdesc_check(jd); 946 error = gfs2_jdesc_check(jd);
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 9ab9fc85ecd0..74846559fc3f 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -110,18 +110,6 @@ static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf,
110 return len; 110 return len;
111} 111}
112 112
113static ssize_t shrink_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
114{
115 if (!capable(CAP_SYS_ADMIN))
116 return -EACCES;
117
118 if (simple_strtol(buf, NULL, 0) != 1)
119 return -EINVAL;
120
121 gfs2_gl_hash_clear(sdp, NO_WAIT);
122 return len;
123}
124
125static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, 113static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
126 size_t len) 114 size_t len)
127{ 115{
@@ -175,7 +163,6 @@ static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
175GFS2_ATTR(id, 0444, id_show, NULL); 163GFS2_ATTR(id, 0444, id_show, NULL);
176GFS2_ATTR(fsname, 0444, fsname_show, NULL); 164GFS2_ATTR(fsname, 0444, fsname_show, NULL);
177GFS2_ATTR(freeze, 0644, freeze_show, freeze_store); 165GFS2_ATTR(freeze, 0644, freeze_show, freeze_store);
178GFS2_ATTR(shrink, 0200, NULL, shrink_store);
179GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store); 166GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
180GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store); 167GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store);
181GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store); 168GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store);
@@ -186,7 +173,6 @@ static struct attribute *gfs2_attrs[] = {
186 &gfs2_attr_id.attr, 173 &gfs2_attr_id.attr,
187 &gfs2_attr_fsname.attr, 174 &gfs2_attr_fsname.attr,
188 &gfs2_attr_freeze.attr, 175 &gfs2_attr_freeze.attr,
189 &gfs2_attr_shrink.attr,
190 &gfs2_attr_withdraw.attr, 176 &gfs2_attr_withdraw.attr,
191 &gfs2_attr_statfs_sync.attr, 177 &gfs2_attr_statfs_sync.attr,
192 &gfs2_attr_quota_sync.attr, 178 &gfs2_attr_quota_sync.attr,
@@ -426,7 +412,6 @@ TUNE_ATTR(max_readahead, 0);
426TUNE_ATTR(complain_secs, 0); 412TUNE_ATTR(complain_secs, 0);
427TUNE_ATTR(statfs_slow, 0); 413TUNE_ATTR(statfs_slow, 0);
428TUNE_ATTR(new_files_jdata, 0); 414TUNE_ATTR(new_files_jdata, 0);
429TUNE_ATTR(new_files_directio, 0);
430TUNE_ATTR(quota_simul_sync, 1); 415TUNE_ATTR(quota_simul_sync, 1);
431TUNE_ATTR(quota_cache_secs, 1); 416TUNE_ATTR(quota_cache_secs, 1);
432TUNE_ATTR(stall_secs, 1); 417TUNE_ATTR(stall_secs, 1);
@@ -455,7 +440,6 @@ static struct attribute *tune_attrs[] = {
455 &tune_attr_quotad_secs.attr, 440 &tune_attr_quotad_secs.attr,
456 &tune_attr_quota_scale.attr, 441 &tune_attr_quota_scale.attr,
457 &tune_attr_new_files_jdata.attr, 442 &tune_attr_new_files_jdata.attr,
458 &tune_attr_new_files_directio.attr,
459 NULL, 443 NULL,
460}; 444};
461 445
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 6914598022ce..91389c8aee8a 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -688,7 +688,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
688 688
689 J_ASSERT(transaction->t_state == T_FINISHED); 689 J_ASSERT(transaction->t_state == T_FINISHED);
690 J_ASSERT(transaction->t_buffers == NULL); 690 J_ASSERT(transaction->t_buffers == NULL);
691 J_ASSERT(transaction->t_sync_datalist == NULL);
692 J_ASSERT(transaction->t_forget == NULL); 691 J_ASSERT(transaction->t_forget == NULL);
693 J_ASSERT(transaction->t_iobuf_list == NULL); 692 J_ASSERT(transaction->t_iobuf_list == NULL);
694 J_ASSERT(transaction->t_shadow_list == NULL); 693 J_ASSERT(transaction->t_shadow_list == NULL);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index a2ed72f7ceee..f8b3be873226 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -22,6 +22,8 @@
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/jiffies.h> 23#include <linux/jiffies.h>
24#include <linux/crc32.h> 24#include <linux/crc32.h>
25#include <linux/writeback.h>
26#include <linux/backing-dev.h>
25 27
26/* 28/*
27 * Default IO end handler for temporary BJ_IO buffer_heads. 29 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -37,8 +39,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
37} 39}
38 40
39/* 41/*
40 * When an ext3-ordered file is truncated, it is possible that many pages are 42 * When an ext4 file is truncated, it is possible that some pages are not
41 * not sucessfully freed, because they are attached to a committing transaction. 43 * successfully freed, because they are attached to a committing transaction.
42 * After the transaction commits, these pages are left on the LRU, with no 44 * After the transaction commits, these pages are left on the LRU, with no
43 * ->mapping, and with attached buffers. These pages are trivially reclaimable 45 * ->mapping, and with attached buffers. These pages are trivially reclaimable
44 * by the VM, but their apparent absence upsets the VM accounting, and it makes 46 * by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -80,21 +82,6 @@ nope:
80} 82}
81 83
82/* 84/*
83 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
84 * held. For ranking reasons we must trylock. If we lose, schedule away and
85 * return 0. j_list_lock is dropped in this case.
86 */
87static int inverted_lock(journal_t *journal, struct buffer_head *bh)
88{
89 if (!jbd_trylock_bh_state(bh)) {
90 spin_unlock(&journal->j_list_lock);
91 schedule();
92 return 0;
93 }
94 return 1;
95}
96
97/*
98 * Done it all: now submit the commit record. We should have 85 * Done it all: now submit the commit record. We should have
99 * cleaned up our previous buffers by now, so if we are in abort 86 * cleaned up our previous buffers by now, so if we are in abort
100 * mode we can now just skip the rest of the journal write 87 * mode we can now just skip the rest of the journal write
@@ -112,6 +99,7 @@ static int journal_submit_commit_record(journal_t *journal,
112 struct buffer_head *bh; 99 struct buffer_head *bh;
113 int ret; 100 int ret;
114 int barrier_done = 0; 101 int barrier_done = 0;
102 struct timespec now = current_kernel_time();
115 103
116 if (is_journal_aborted(journal)) 104 if (is_journal_aborted(journal))
117 return 0; 105 return 0;
@@ -126,6 +114,8 @@ static int journal_submit_commit_record(journal_t *journal,
126 tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); 114 tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
127 tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); 115 tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
128 tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); 116 tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
117 tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
118 tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
129 119
130 if (JBD2_HAS_COMPAT_FEATURE(journal, 120 if (JBD2_HAS_COMPAT_FEATURE(journal,
131 JBD2_FEATURE_COMPAT_CHECKSUM)) { 121 JBD2_FEATURE_COMPAT_CHECKSUM)) {
@@ -197,159 +187,104 @@ static int journal_wait_on_commit_record(struct buffer_head *bh)
197} 187}
198 188
199/* 189/*
200 * Wait for all submitted IO to complete. 190 * write the filemap data using writepage() address_space_operations.
191 * We don't do block allocation here even for delalloc. We don't
192 * use writepages() because with dealyed allocation we may be doing
193 * block allocation in writepages().
201 */ 194 */
202static int journal_wait_on_locked_list(journal_t *journal, 195static int journal_submit_inode_data_buffers(struct address_space *mapping)
203 transaction_t *commit_transaction)
204{ 196{
205 int ret = 0; 197 int ret;
206 struct journal_head *jh; 198 struct writeback_control wbc = {
207 199 .sync_mode = WB_SYNC_ALL,
208 while (commit_transaction->t_locked_list) { 200 .nr_to_write = mapping->nrpages * 2,
209 struct buffer_head *bh; 201 .range_start = 0,
210 202 .range_end = i_size_read(mapping->host),
211 jh = commit_transaction->t_locked_list->b_tprev; 203 .for_writepages = 1,
212 bh = jh2bh(jh); 204 };
213 get_bh(bh); 205
214 if (buffer_locked(bh)) { 206 ret = generic_writepages(mapping, &wbc);
215 spin_unlock(&journal->j_list_lock);
216 wait_on_buffer(bh);
217 if (unlikely(!buffer_uptodate(bh)))
218 ret = -EIO;
219 spin_lock(&journal->j_list_lock);
220 }
221 if (!inverted_lock(journal, bh)) {
222 put_bh(bh);
223 spin_lock(&journal->j_list_lock);
224 continue;
225 }
226 if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
227 __jbd2_journal_unfile_buffer(jh);
228 jbd_unlock_bh_state(bh);
229 jbd2_journal_remove_journal_head(bh);
230 put_bh(bh);
231 } else {
232 jbd_unlock_bh_state(bh);
233 }
234 put_bh(bh);
235 cond_resched_lock(&journal->j_list_lock);
236 }
237 return ret; 207 return ret;
238 } 208}
239 209
240static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) 210/*
211 * Submit all the data buffers of inode associated with the transaction to
212 * disk.
213 *
214 * We are in a committing transaction. Therefore no new inode can be added to
215 * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
216 * operate on from being released while we write out pages.
217 */
218static int journal_submit_data_buffers(journal_t *journal,
219 transaction_t *commit_transaction)
241{ 220{
242 int i; 221 struct jbd2_inode *jinode;
222 int err, ret = 0;
223 struct address_space *mapping;
243 224
244 for (i = 0; i < bufs; i++) { 225 spin_lock(&journal->j_list_lock);
245 wbuf[i]->b_end_io = end_buffer_write_sync; 226 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
246 /* We use-up our safety reference in submit_bh() */ 227 mapping = jinode->i_vfs_inode->i_mapping;
247 submit_bh(WRITE, wbuf[i]); 228 jinode->i_flags |= JI_COMMIT_RUNNING;
229 spin_unlock(&journal->j_list_lock);
230 /*
231 * submit the inode data buffers. We use writepage
232 * instead of writepages. Because writepages can do
233 * block allocation with delalloc. We need to write
234 * only allocated blocks here.
235 */
236 err = journal_submit_inode_data_buffers(mapping);
237 if (!ret)
238 ret = err;
239 spin_lock(&journal->j_list_lock);
240 J_ASSERT(jinode->i_transaction == commit_transaction);
241 jinode->i_flags &= ~JI_COMMIT_RUNNING;
242 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
248 } 243 }
244 spin_unlock(&journal->j_list_lock);
245 return ret;
249} 246}
250 247
251/* 248/*
252 * Submit all the data buffers to disk 249 * Wait for data submitted for writeout, refile inodes to proper
250 * transaction if needed.
251 *
253 */ 252 */
254static void journal_submit_data_buffers(journal_t *journal, 253static int journal_finish_inode_data_buffers(journal_t *journal,
255 transaction_t *commit_transaction) 254 transaction_t *commit_transaction)
256{ 255{
257 struct journal_head *jh; 256 struct jbd2_inode *jinode, *next_i;
258 struct buffer_head *bh; 257 int err, ret = 0;
259 int locked;
260 int bufs = 0;
261 struct buffer_head **wbuf = journal->j_wbuf;
262 258
263 /* 259 /* For locking, see the comment in journal_submit_data_buffers() */
264 * Whenever we unlock the journal and sleep, things can get added
265 * onto ->t_sync_datalist, so we have to keep looping back to
266 * write_out_data until we *know* that the list is empty.
267 *
268 * Cleanup any flushed data buffers from the data list. Even in
269 * abort mode, we want to flush this out as soon as possible.
270 */
271write_out_data:
272 cond_resched();
273 spin_lock(&journal->j_list_lock); 260 spin_lock(&journal->j_list_lock);
261 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
262 jinode->i_flags |= JI_COMMIT_RUNNING;
263 spin_unlock(&journal->j_list_lock);
264 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
265 if (!ret)
266 ret = err;
267 spin_lock(&journal->j_list_lock);
268 jinode->i_flags &= ~JI_COMMIT_RUNNING;
269 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
270 }
274 271
275 while (commit_transaction->t_sync_datalist) { 272 /* Now refile inode to proper lists */
276 jh = commit_transaction->t_sync_datalist; 273 list_for_each_entry_safe(jinode, next_i,
277 bh = jh2bh(jh); 274 &commit_transaction->t_inode_list, i_list) {
278 locked = 0; 275 list_del(&jinode->i_list);
279 276 if (jinode->i_next_transaction) {
280 /* Get reference just to make sure buffer does not disappear 277 jinode->i_transaction = jinode->i_next_transaction;
281 * when we are forced to drop various locks */ 278 jinode->i_next_transaction = NULL;
282 get_bh(bh); 279 list_add(&jinode->i_list,
283 /* If the buffer is dirty, we need to submit IO and hence 280 &jinode->i_transaction->t_inode_list);
284 * we need the buffer lock. We try to lock the buffer without
285 * blocking. If we fail, we need to drop j_list_lock and do
286 * blocking lock_buffer().
287 */
288 if (buffer_dirty(bh)) {
289 if (test_set_buffer_locked(bh)) {
290 BUFFER_TRACE(bh, "needs blocking lock");
291 spin_unlock(&journal->j_list_lock);
292 /* Write out all data to prevent deadlocks */
293 journal_do_submit_data(wbuf, bufs);
294 bufs = 0;
295 lock_buffer(bh);
296 spin_lock(&journal->j_list_lock);
297 }
298 locked = 1;
299 }
300 /* We have to get bh_state lock. Again out of order, sigh. */
301 if (!inverted_lock(journal, bh)) {
302 jbd_lock_bh_state(bh);
303 spin_lock(&journal->j_list_lock);
304 }
305 /* Someone already cleaned up the buffer? */
306 if (!buffer_jbd(bh)
307 || jh->b_transaction != commit_transaction
308 || jh->b_jlist != BJ_SyncData) {
309 jbd_unlock_bh_state(bh);
310 if (locked)
311 unlock_buffer(bh);
312 BUFFER_TRACE(bh, "already cleaned up");
313 put_bh(bh);
314 continue;
315 }
316 if (locked && test_clear_buffer_dirty(bh)) {
317 BUFFER_TRACE(bh, "needs writeout, adding to array");
318 wbuf[bufs++] = bh;
319 __jbd2_journal_file_buffer(jh, commit_transaction,
320 BJ_Locked);
321 jbd_unlock_bh_state(bh);
322 if (bufs == journal->j_wbufsize) {
323 spin_unlock(&journal->j_list_lock);
324 journal_do_submit_data(wbuf, bufs);
325 bufs = 0;
326 goto write_out_data;
327 }
328 } else if (!locked && buffer_locked(bh)) {
329 __jbd2_journal_file_buffer(jh, commit_transaction,
330 BJ_Locked);
331 jbd_unlock_bh_state(bh);
332 put_bh(bh);
333 } else { 281 } else {
334 BUFFER_TRACE(bh, "writeout complete: unfile"); 282 jinode->i_transaction = NULL;
335 __jbd2_journal_unfile_buffer(jh);
336 jbd_unlock_bh_state(bh);
337 if (locked)
338 unlock_buffer(bh);
339 jbd2_journal_remove_journal_head(bh);
340 /* Once for our safety reference, once for
341 * jbd2_journal_remove_journal_head() */
342 put_bh(bh);
343 put_bh(bh);
344 }
345
346 if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
347 spin_unlock(&journal->j_list_lock);
348 goto write_out_data;
349 } 283 }
350 } 284 }
351 spin_unlock(&journal->j_list_lock); 285 spin_unlock(&journal->j_list_lock);
352 journal_do_submit_data(wbuf, bufs); 286
287 return ret;
353} 288}
354 289
355static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) 290static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
@@ -524,21 +459,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
524 * Now start flushing things to disk, in the order they appear 459 * Now start flushing things to disk, in the order they appear
525 * on the transaction lists. Data blocks go first. 460 * on the transaction lists. Data blocks go first.
526 */ 461 */
527 err = 0; 462 err = journal_submit_data_buffers(journal, commit_transaction);
528 journal_submit_data_buffers(journal, commit_transaction);
529
530 /*
531 * Wait for all previously submitted IO to complete if commit
532 * record is to be written synchronously.
533 */
534 spin_lock(&journal->j_list_lock);
535 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
536 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
537 err = journal_wait_on_locked_list(journal,
538 commit_transaction);
539
540 spin_unlock(&journal->j_list_lock);
541
542 if (err) 463 if (err)
543 jbd2_journal_abort(journal, err); 464 jbd2_journal_abort(journal, err);
544 465
@@ -547,16 +468,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
547 jbd_debug(3, "JBD: commit phase 2\n"); 468 jbd_debug(3, "JBD: commit phase 2\n");
548 469
549 /* 470 /*
550 * If we found any dirty or locked buffers, then we should have
551 * looped back up to the write_out_data label. If there weren't
552 * any then journal_clean_data_list should have wiped the list
553 * clean by now, so check that it is in fact empty.
554 */
555 J_ASSERT (commit_transaction->t_sync_datalist == NULL);
556
557 jbd_debug (3, "JBD: commit phase 3\n");
558
559 /*
560 * Way to go: we have now written out all of the data for a 471 * Way to go: we have now written out all of the data for a
561 * transaction! Now comes the tricky part: we need to write out 472 * transaction! Now comes the tricky part: we need to write out
562 * metadata. Loop over the transaction's entire buffer list: 473 * metadata. Loop over the transaction's entire buffer list:
@@ -574,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
574 J_ASSERT(commit_transaction->t_nr_buffers <= 485 J_ASSERT(commit_transaction->t_nr_buffers <=
575 commit_transaction->t_outstanding_credits); 486 commit_transaction->t_outstanding_credits);
576 487
488 err = 0;
577 descriptor = NULL; 489 descriptor = NULL;
578 bufs = 0; 490 bufs = 0;
579 while (commit_transaction->t_buffers) { 491 while (commit_transaction->t_buffers) {
@@ -748,15 +660,19 @@ start_journal_io:
748 &cbh, crc32_sum); 660 &cbh, crc32_sum);
749 if (err) 661 if (err)
750 __jbd2_journal_abort_hard(journal); 662 __jbd2_journal_abort_hard(journal);
751
752 spin_lock(&journal->j_list_lock);
753 err = journal_wait_on_locked_list(journal,
754 commit_transaction);
755 spin_unlock(&journal->j_list_lock);
756 if (err)
757 __jbd2_journal_abort_hard(journal);
758 } 663 }
759 664
665 /*
666 * This is the right place to wait for data buffers both for ASYNC
667 * and !ASYNC commit. If commit is ASYNC, we need to wait only after
668 * the commit block went to disk (which happens above). If commit is
669 * SYNC, we need to wait for data buffers before we start writing
670 * commit block, which happens below in such setting.
671 */
672 err = journal_finish_inode_data_buffers(journal, commit_transaction);
673 if (err)
674 jbd2_journal_abort(journal, err);
675
760 /* Lo and behold: we have just managed to send a transaction to 676 /* Lo and behold: we have just managed to send a transaction to
761 the log. Before we can commit it, wait for the IO so far to 677 the log. Before we can commit it, wait for the IO so far to
762 complete. Control buffers being written are on the 678 complete. Control buffers being written are on the
@@ -768,7 +684,7 @@ start_journal_io:
768 so we incur less scheduling load. 684 so we incur less scheduling load.
769 */ 685 */
770 686
771 jbd_debug(3, "JBD: commit phase 4\n"); 687 jbd_debug(3, "JBD: commit phase 3\n");
772 688
773 /* 689 /*
774 * akpm: these are BJ_IO, and j_list_lock is not needed. 690 * akpm: these are BJ_IO, and j_list_lock is not needed.
@@ -827,7 +743,7 @@ wait_for_iobuf:
827 743
828 J_ASSERT (commit_transaction->t_shadow_list == NULL); 744 J_ASSERT (commit_transaction->t_shadow_list == NULL);
829 745
830 jbd_debug(3, "JBD: commit phase 5\n"); 746 jbd_debug(3, "JBD: commit phase 4\n");
831 747
832 /* Here we wait for the revoke record and descriptor record buffers */ 748 /* Here we wait for the revoke record and descriptor record buffers */
833 wait_for_ctlbuf: 749 wait_for_ctlbuf:
@@ -854,7 +770,7 @@ wait_for_iobuf:
854 /* AKPM: bforget here */ 770 /* AKPM: bforget here */
855 } 771 }
856 772
857 jbd_debug(3, "JBD: commit phase 6\n"); 773 jbd_debug(3, "JBD: commit phase 5\n");
858 774
859 if (!JBD2_HAS_INCOMPAT_FEATURE(journal, 775 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
860 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { 776 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
@@ -874,9 +790,9 @@ wait_for_iobuf:
874 transaction can be removed from any checkpoint list it was on 790 transaction can be removed from any checkpoint list it was on
875 before. */ 791 before. */
876 792
877 jbd_debug(3, "JBD: commit phase 7\n"); 793 jbd_debug(3, "JBD: commit phase 6\n");
878 794
879 J_ASSERT(commit_transaction->t_sync_datalist == NULL); 795 J_ASSERT(list_empty(&commit_transaction->t_inode_list));
880 J_ASSERT(commit_transaction->t_buffers == NULL); 796 J_ASSERT(commit_transaction->t_buffers == NULL);
881 J_ASSERT(commit_transaction->t_checkpoint_list == NULL); 797 J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
882 J_ASSERT(commit_transaction->t_iobuf_list == NULL); 798 J_ASSERT(commit_transaction->t_iobuf_list == NULL);
@@ -997,7 +913,7 @@ restart_loop:
997 913
998 /* Done with this transaction! */ 914 /* Done with this transaction! */
999 915
1000 jbd_debug(3, "JBD: commit phase 8\n"); 916 jbd_debug(3, "JBD: commit phase 7\n");
1001 917
1002 J_ASSERT(commit_transaction->t_state == T_COMMIT); 918 J_ASSERT(commit_transaction->t_state == T_COMMIT);
1003 919
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 2e24567c4a79..b26c6d9fe6ae 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -50,7 +50,6 @@ EXPORT_SYMBOL(jbd2_journal_unlock_updates);
50EXPORT_SYMBOL(jbd2_journal_get_write_access); 50EXPORT_SYMBOL(jbd2_journal_get_write_access);
51EXPORT_SYMBOL(jbd2_journal_get_create_access); 51EXPORT_SYMBOL(jbd2_journal_get_create_access);
52EXPORT_SYMBOL(jbd2_journal_get_undo_access); 52EXPORT_SYMBOL(jbd2_journal_get_undo_access);
53EXPORT_SYMBOL(jbd2_journal_dirty_data);
54EXPORT_SYMBOL(jbd2_journal_dirty_metadata); 53EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
55EXPORT_SYMBOL(jbd2_journal_release_buffer); 54EXPORT_SYMBOL(jbd2_journal_release_buffer);
56EXPORT_SYMBOL(jbd2_journal_forget); 55EXPORT_SYMBOL(jbd2_journal_forget);
@@ -82,6 +81,10 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
82EXPORT_SYMBOL(jbd2_journal_invalidatepage); 81EXPORT_SYMBOL(jbd2_journal_invalidatepage);
83EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); 82EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
84EXPORT_SYMBOL(jbd2_journal_force_commit); 83EXPORT_SYMBOL(jbd2_journal_force_commit);
84EXPORT_SYMBOL(jbd2_journal_file_inode);
85EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
86EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
87EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
85 88
86static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 89static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
87static void __journal_abort_soft (journal_t *journal, int errno); 90static void __journal_abort_soft (journal_t *journal, int errno);
@@ -2195,6 +2198,54 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
2195} 2198}
2196 2199
2197/* 2200/*
2201 * Initialize jbd inode head
2202 */
2203void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
2204{
2205 jinode->i_transaction = NULL;
2206 jinode->i_next_transaction = NULL;
2207 jinode->i_vfs_inode = inode;
2208 jinode->i_flags = 0;
2209 INIT_LIST_HEAD(&jinode->i_list);
2210}
2211
2212/*
2213 * Function to be called before we start removing inode from memory (i.e.,
2214 * clear_inode() is a fine place to be called from). It removes inode from
2215 * transaction's lists.
2216 */
2217void jbd2_journal_release_jbd_inode(journal_t *journal,
2218 struct jbd2_inode *jinode)
2219{
2220 int writeout = 0;
2221
2222 if (!journal)
2223 return;
2224restart:
2225 spin_lock(&journal->j_list_lock);
2226 /* Is commit writing out inode - we have to wait */
2227 if (jinode->i_flags & JI_COMMIT_RUNNING) {
2228 wait_queue_head_t *wq;
2229 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
2230 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
2231 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
2232 spin_unlock(&journal->j_list_lock);
2233 schedule();
2234 finish_wait(wq, &wait.wait);
2235 goto restart;
2236 }
2237
2238 /* Do we need to wait for data writeback? */
2239 if (journal->j_committing_transaction == jinode->i_transaction)
2240 writeout = 1;
2241 if (jinode->i_transaction) {
2242 list_del(&jinode->i_list);
2243 jinode->i_transaction = NULL;
2244 }
2245 spin_unlock(&journal->j_list_lock);
2246}
2247
2248/*
2198 * debugfs tunables 2249 * debugfs tunables
2199 */ 2250 */
2200#ifdef CONFIG_JBD2_DEBUG 2251#ifdef CONFIG_JBD2_DEBUG
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index d6e006e67804..4f7cadbb19fa 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -41,7 +41,6 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
41 * new transaction and we can't block without protecting against other 41 * new transaction and we can't block without protecting against other
42 * processes trying to touch the journal while it is in transition. 42 * processes trying to touch the journal while it is in transition.
43 * 43 *
44 * Called under j_state_lock
45 */ 44 */
46 45
47static transaction_t * 46static transaction_t *
@@ -52,6 +51,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
52 transaction->t_tid = journal->j_transaction_sequence++; 51 transaction->t_tid = journal->j_transaction_sequence++;
53 transaction->t_expires = jiffies + journal->j_commit_interval; 52 transaction->t_expires = jiffies + journal->j_commit_interval;
54 spin_lock_init(&transaction->t_handle_lock); 53 spin_lock_init(&transaction->t_handle_lock);
54 INIT_LIST_HEAD(&transaction->t_inode_list);
55 55
56 /* Set up the commit timer for the new transaction. */ 56 /* Set up the commit timer for the new transaction. */
57 journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); 57 journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
@@ -943,183 +943,6 @@ out:
943} 943}
944 944
945/** 945/**
946 * int jbd2_journal_dirty_data() - mark a buffer as containing dirty data which
947 * needs to be flushed before we can commit the
948 * current transaction.
949 * @handle: transaction
950 * @bh: bufferhead to mark
951 *
952 * The buffer is placed on the transaction's data list and is marked as
953 * belonging to the transaction.
954 *
955 * Returns error number or 0 on success.
956 *
957 * jbd2_journal_dirty_data() can be called via page_launder->ext3_writepage
958 * by kswapd.
959 */
960int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
961{
962 journal_t *journal = handle->h_transaction->t_journal;
963 int need_brelse = 0;
964 struct journal_head *jh;
965
966 if (is_handle_aborted(handle))
967 return 0;
968
969 jh = jbd2_journal_add_journal_head(bh);
970 JBUFFER_TRACE(jh, "entry");
971
972 /*
973 * The buffer could *already* be dirty. Writeout can start
974 * at any time.
975 */
976 jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid);
977
978 /*
979 * What if the buffer is already part of a running transaction?
980 *
981 * There are two cases:
982 * 1) It is part of the current running transaction. Refile it,
983 * just in case we have allocated it as metadata, deallocated
984 * it, then reallocated it as data.
985 * 2) It is part of the previous, still-committing transaction.
986 * If all we want to do is to guarantee that the buffer will be
987 * written to disk before this new transaction commits, then
988 * being sure that the *previous* transaction has this same
989 * property is sufficient for us! Just leave it on its old
990 * transaction.
991 *
992 * In case (2), the buffer must not already exist as metadata
993 * --- that would violate write ordering (a transaction is free
994 * to write its data at any point, even before the previous
995 * committing transaction has committed). The caller must
996 * never, ever allow this to happen: there's nothing we can do
997 * about it in this layer.
998 */
999 jbd_lock_bh_state(bh);
1000 spin_lock(&journal->j_list_lock);
1001
1002 /* Now that we have bh_state locked, are we really still mapped? */
1003 if (!buffer_mapped(bh)) {
1004 JBUFFER_TRACE(jh, "unmapped buffer, bailing out");
1005 goto no_journal;
1006 }
1007
1008 if (jh->b_transaction) {
1009 JBUFFER_TRACE(jh, "has transaction");
1010 if (jh->b_transaction != handle->h_transaction) {
1011 JBUFFER_TRACE(jh, "belongs to older transaction");
1012 J_ASSERT_JH(jh, jh->b_transaction ==
1013 journal->j_committing_transaction);
1014
1015 /* @@@ IS THIS TRUE ? */
1016 /*
1017 * Not any more. Scenario: someone does a write()
1018 * in data=journal mode. The buffer's transaction has
1019 * moved into commit. Then someone does another
1020 * write() to the file. We do the frozen data copyout
1021 * and set b_next_transaction to point to j_running_t.
1022 * And while we're in that state, someone does a
1023 * writepage() in an attempt to pageout the same area
1024 * of the file via a shared mapping. At present that
1025 * calls jbd2_journal_dirty_data(), and we get right here.
1026 * It may be too late to journal the data. Simply
1027 * falling through to the next test will suffice: the
1028 * data will be dirty and wil be checkpointed. The
1029 * ordering comments in the next comment block still
1030 * apply.
1031 */
1032 //J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
1033
1034 /*
1035 * If we're journalling data, and this buffer was
1036 * subject to a write(), it could be metadata, forget
1037 * or shadow against the committing transaction. Now,
1038 * someone has dirtied the same darn page via a mapping
1039 * and it is being writepage()'d.
1040 * We *could* just steal the page from commit, with some
1041 * fancy locking there. Instead, we just skip it -
1042 * don't tie the page's buffers to the new transaction
1043 * at all.
1044 * Implication: if we crash before the writepage() data
1045 * is written into the filesystem, recovery will replay
1046 * the write() data.
1047 */
1048 if (jh->b_jlist != BJ_None &&
1049 jh->b_jlist != BJ_SyncData &&
1050 jh->b_jlist != BJ_Locked) {
1051 JBUFFER_TRACE(jh, "Not stealing");
1052 goto no_journal;
1053 }
1054
1055 /*
1056 * This buffer may be undergoing writeout in commit. We
1057 * can't return from here and let the caller dirty it
1058 * again because that can cause the write-out loop in
1059 * commit to never terminate.
1060 */
1061 if (buffer_dirty(bh)) {
1062 get_bh(bh);
1063 spin_unlock(&journal->j_list_lock);
1064 jbd_unlock_bh_state(bh);
1065 need_brelse = 1;
1066 sync_dirty_buffer(bh);
1067 jbd_lock_bh_state(bh);
1068 spin_lock(&journal->j_list_lock);
1069 /* Since we dropped the lock... */
1070 if (!buffer_mapped(bh)) {
1071 JBUFFER_TRACE(jh, "buffer got unmapped");
1072 goto no_journal;
1073 }
1074 /* The buffer may become locked again at any
1075 time if it is redirtied */
1076 }
1077
1078 /* journal_clean_data_list() may have got there first */
1079 if (jh->b_transaction != NULL) {
1080 JBUFFER_TRACE(jh, "unfile from commit");
1081 __jbd2_journal_temp_unlink_buffer(jh);
1082 /* It still points to the committing
1083 * transaction; move it to this one so
1084 * that the refile assert checks are
1085 * happy. */
1086 jh->b_transaction = handle->h_transaction;
1087 }
1088 /* The buffer will be refiled below */
1089
1090 }
1091 /*
1092 * Special case --- the buffer might actually have been
1093 * allocated and then immediately deallocated in the previous,
1094 * committing transaction, so might still be left on that
1095 * transaction's metadata lists.
1096 */
1097 if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
1098 JBUFFER_TRACE(jh, "not on correct data list: unfile");
1099 J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
1100 __jbd2_journal_temp_unlink_buffer(jh);
1101 jh->b_transaction = handle->h_transaction;
1102 JBUFFER_TRACE(jh, "file as data");
1103 __jbd2_journal_file_buffer(jh, handle->h_transaction,
1104 BJ_SyncData);
1105 }
1106 } else {
1107 JBUFFER_TRACE(jh, "not on a transaction");
1108 __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_SyncData);
1109 }
1110no_journal:
1111 spin_unlock(&journal->j_list_lock);
1112 jbd_unlock_bh_state(bh);
1113 if (need_brelse) {
1114 BUFFER_TRACE(bh, "brelse");
1115 __brelse(bh);
1116 }
1117 JBUFFER_TRACE(jh, "exit");
1118 jbd2_journal_put_journal_head(jh);
1119 return 0;
1120}
1121
1122/**
1123 * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata 946 * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
1124 * @handle: transaction to add buffer to. 947 * @handle: transaction to add buffer to.
1125 * @bh: buffer to mark 948 * @bh: buffer to mark
@@ -1541,10 +1364,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1541 * Remove a buffer from the appropriate transaction list. 1364 * Remove a buffer from the appropriate transaction list.
1542 * 1365 *
1543 * Note that this function can *change* the value of 1366 * Note that this function can *change* the value of
1544 * bh->b_transaction->t_sync_datalist, t_buffers, t_forget, 1367 * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list,
1545 * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list. If the caller 1368 * t_log_list or t_reserved_list. If the caller is holding onto a copy of one
1546 * is holding onto a copy of one of thee pointers, it could go bad. 1369 * of these pointers, it could go bad. Generally the caller needs to re-read
1547 * Generally the caller needs to re-read the pointer from the transaction_t. 1370 * the pointer from the transaction_t.
1548 * 1371 *
1549 * Called under j_list_lock. The journal may not be locked. 1372 * Called under j_list_lock. The journal may not be locked.
1550 */ 1373 */
@@ -1566,9 +1389,6 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
1566 switch (jh->b_jlist) { 1389 switch (jh->b_jlist) {
1567 case BJ_None: 1390 case BJ_None:
1568 return; 1391 return;
1569 case BJ_SyncData:
1570 list = &transaction->t_sync_datalist;
1571 break;
1572 case BJ_Metadata: 1392 case BJ_Metadata:
1573 transaction->t_nr_buffers--; 1393 transaction->t_nr_buffers--;
1574 J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); 1394 J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
@@ -1589,9 +1409,6 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
1589 case BJ_Reserved: 1409 case BJ_Reserved:
1590 list = &transaction->t_reserved_list; 1410 list = &transaction->t_reserved_list;
1591 break; 1411 break;
1592 case BJ_Locked:
1593 list = &transaction->t_locked_list;
1594 break;
1595 } 1412 }
1596 1413
1597 __blist_del_buffer(list, jh); 1414 __blist_del_buffer(list, jh);
@@ -1634,15 +1451,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1634 goto out; 1451 goto out;
1635 1452
1636 spin_lock(&journal->j_list_lock); 1453 spin_lock(&journal->j_list_lock);
1637 if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) { 1454 if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
1638 if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) {
1639 /* A written-back ordered data buffer */
1640 JBUFFER_TRACE(jh, "release data");
1641 __jbd2_journal_unfile_buffer(jh);
1642 jbd2_journal_remove_journal_head(bh);
1643 __brelse(bh);
1644 }
1645 } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
1646 /* written-back checkpointed metadata buffer */ 1455 /* written-back checkpointed metadata buffer */
1647 if (jh->b_jlist == BJ_None) { 1456 if (jh->b_jlist == BJ_None) {
1648 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1457 JBUFFER_TRACE(jh, "remove from checkpoint list");
@@ -1656,12 +1465,43 @@ out:
1656 return; 1465 return;
1657} 1466}
1658 1467
1468/*
1469 * jbd2_journal_try_to_free_buffers() could race with
1470 * jbd2_journal_commit_transaction(). The later might still hold the
1471 * reference count to the buffers when inspecting them on
1472 * t_syncdata_list or t_locked_list.
1473 *
1474 * jbd2_journal_try_to_free_buffers() will call this function to
1475 * wait for the current transaction to finish syncing data buffers, before
1476 * try to free that buffer.
1477 *
1478 * Called with journal->j_state_lock hold.
1479 */
1480static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal)
1481{
1482 transaction_t *transaction;
1483 tid_t tid;
1484
1485 spin_lock(&journal->j_state_lock);
1486 transaction = journal->j_committing_transaction;
1487
1488 if (!transaction) {
1489 spin_unlock(&journal->j_state_lock);
1490 return;
1491 }
1492
1493 tid = transaction->t_tid;
1494 spin_unlock(&journal->j_state_lock);
1495 jbd2_log_wait_commit(journal, tid);
1496}
1659 1497
1660/** 1498/**
1661 * int jbd2_journal_try_to_free_buffers() - try to free page buffers. 1499 * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
1662 * @journal: journal for operation 1500 * @journal: journal for operation
1663 * @page: to try and free 1501 * @page: to try and free
1664 * @unused_gfp_mask: unused 1502 * @gfp_mask: we use the mask to detect how hard should we try to release
1503 * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
1504 * release the buffers.
1665 * 1505 *
1666 * 1506 *
1667 * For all the buffers on this page, 1507 * For all the buffers on this page,
@@ -1690,9 +1530,11 @@ out:
1690 * journal_try_to_free_buffer() is changing its state. But that 1530 * journal_try_to_free_buffer() is changing its state. But that
1691 * cannot happen because we never reallocate freed data as metadata 1531 * cannot happen because we never reallocate freed data as metadata
1692 * while the data is part of a transaction. Yes? 1532 * while the data is part of a transaction. Yes?
1533 *
1534 * Return 0 on failure, 1 on success
1693 */ 1535 */
1694int jbd2_journal_try_to_free_buffers(journal_t *journal, 1536int jbd2_journal_try_to_free_buffers(journal_t *journal,
1695 struct page *page, gfp_t unused_gfp_mask) 1537 struct page *page, gfp_t gfp_mask)
1696{ 1538{
1697 struct buffer_head *head; 1539 struct buffer_head *head;
1698 struct buffer_head *bh; 1540 struct buffer_head *bh;
@@ -1708,7 +1550,8 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
1708 /* 1550 /*
1709 * We take our own ref against the journal_head here to avoid 1551 * We take our own ref against the journal_head here to avoid
1710 * having to add tons of locking around each instance of 1552 * having to add tons of locking around each instance of
1711 * jbd2_journal_remove_journal_head() and jbd2_journal_put_journal_head(). 1553 * jbd2_journal_remove_journal_head() and
1554 * jbd2_journal_put_journal_head().
1712 */ 1555 */
1713 jh = jbd2_journal_grab_journal_head(bh); 1556 jh = jbd2_journal_grab_journal_head(bh);
1714 if (!jh) 1557 if (!jh)
@@ -1721,7 +1564,28 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
1721 if (buffer_jbd(bh)) 1564 if (buffer_jbd(bh))
1722 goto busy; 1565 goto busy;
1723 } while ((bh = bh->b_this_page) != head); 1566 } while ((bh = bh->b_this_page) != head);
1567
1724 ret = try_to_free_buffers(page); 1568 ret = try_to_free_buffers(page);
1569
1570 /*
1571 * There are a number of places where jbd2_journal_try_to_free_buffers()
1572 * could race with jbd2_journal_commit_transaction(), the later still
1573 * holds the reference to the buffers to free while processing them.
1574 * try_to_free_buffers() failed to free those buffers. Some of the
1575 * caller of releasepage() request page buffers to be dropped, otherwise
1576 * treat the fail-to-free as errors (such as generic_file_direct_IO())
1577 *
1578 * So, if the caller of try_to_release_page() wants the synchronous
1579 * behaviour(i.e make sure buffers are dropped upon return),
1580 * let's wait for the current transaction to finish flush of
1581 * dirty data buffers, then try to free those buffers again,
1582 * with the journal locked.
1583 */
1584 if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
1585 jbd2_journal_wait_for_transaction_sync_data(journal);
1586 ret = try_to_free_buffers(page);
1587 }
1588
1725busy: 1589busy:
1726 return ret; 1590 return ret;
1727} 1591}
@@ -1823,6 +1687,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1823 if (!buffer_jbd(bh)) 1687 if (!buffer_jbd(bh))
1824 goto zap_buffer_unlocked; 1688 goto zap_buffer_unlocked;
1825 1689
1690 /* OK, we have data buffer in journaled mode */
1826 spin_lock(&journal->j_state_lock); 1691 spin_lock(&journal->j_state_lock);
1827 jbd_lock_bh_state(bh); 1692 jbd_lock_bh_state(bh);
1828 spin_lock(&journal->j_list_lock); 1693 spin_lock(&journal->j_list_lock);
@@ -1886,15 +1751,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1886 } 1751 }
1887 } else if (transaction == journal->j_committing_transaction) { 1752 } else if (transaction == journal->j_committing_transaction) {
1888 JBUFFER_TRACE(jh, "on committing transaction"); 1753 JBUFFER_TRACE(jh, "on committing transaction");
1889 if (jh->b_jlist == BJ_Locked) {
1890 /*
1891 * The buffer is on the committing transaction's locked
1892 * list. We have the buffer locked, so I/O has
1893 * completed. So we can nail the buffer now.
1894 */
1895 may_free = __dispose_buffer(jh, transaction);
1896 goto zap_buffer;
1897 }
1898 /* 1754 /*
1899 * If it is committing, we simply cannot touch it. We 1755 * If it is committing, we simply cannot touch it. We
1900 * can remove it's next_transaction pointer from the 1756 * can remove it's next_transaction pointer from the
@@ -2027,9 +1883,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
2027 J_ASSERT_JH(jh, !jh->b_committed_data); 1883 J_ASSERT_JH(jh, !jh->b_committed_data);
2028 J_ASSERT_JH(jh, !jh->b_frozen_data); 1884 J_ASSERT_JH(jh, !jh->b_frozen_data);
2029 return; 1885 return;
2030 case BJ_SyncData:
2031 list = &transaction->t_sync_datalist;
2032 break;
2033 case BJ_Metadata: 1886 case BJ_Metadata:
2034 transaction->t_nr_buffers++; 1887 transaction->t_nr_buffers++;
2035 list = &transaction->t_buffers; 1888 list = &transaction->t_buffers;
@@ -2049,9 +1902,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
2049 case BJ_Reserved: 1902 case BJ_Reserved:
2050 list = &transaction->t_reserved_list; 1903 list = &transaction->t_reserved_list;
2051 break; 1904 break;
2052 case BJ_Locked:
2053 list = &transaction->t_locked_list;
2054 break;
2055 } 1905 }
2056 1906
2057 __blist_add_buffer(list, jh); 1907 __blist_add_buffer(list, jh);
@@ -2141,3 +1991,88 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
2141 spin_unlock(&journal->j_list_lock); 1991 spin_unlock(&journal->j_list_lock);
2142 __brelse(bh); 1992 __brelse(bh);
2143} 1993}
1994
1995/*
1996 * File inode in the inode list of the handle's transaction
1997 */
1998int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
1999{
2000 transaction_t *transaction = handle->h_transaction;
2001 journal_t *journal = transaction->t_journal;
2002
2003 if (is_handle_aborted(handle))
2004 return -EIO;
2005
2006 jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
2007 transaction->t_tid);
2008
2009 /*
2010 * First check whether inode isn't already on the transaction's
2011 * lists without taking the lock. Note that this check is safe
2012 * without the lock as we cannot race with somebody removing inode
2013 * from the transaction. The reason is that we remove inode from the
2014 * transaction only in journal_release_jbd_inode() and when we commit
2015 * the transaction. We are guarded from the first case by holding
2016 * a reference to the inode. We are safe against the second case
2017 * because if jinode->i_transaction == transaction, commit code
2018 * cannot touch the transaction because we hold reference to it,
2019 * and if jinode->i_next_transaction == transaction, commit code
2020 * will only file the inode where we want it.
2021 */
2022 if (jinode->i_transaction == transaction ||
2023 jinode->i_next_transaction == transaction)
2024 return 0;
2025
2026 spin_lock(&journal->j_list_lock);
2027
2028 if (jinode->i_transaction == transaction ||
2029 jinode->i_next_transaction == transaction)
2030 goto done;
2031
2032 /* On some different transaction's list - should be
2033 * the committing one */
2034 if (jinode->i_transaction) {
2035 J_ASSERT(jinode->i_next_transaction == NULL);
2036 J_ASSERT(jinode->i_transaction ==
2037 journal->j_committing_transaction);
2038 jinode->i_next_transaction = transaction;
2039 goto done;
2040 }
2041 /* Not on any transaction list... */
2042 J_ASSERT(!jinode->i_next_transaction);
2043 jinode->i_transaction = transaction;
2044 list_add(&jinode->i_list, &transaction->t_inode_list);
2045done:
2046 spin_unlock(&journal->j_list_lock);
2047
2048 return 0;
2049}
2050
2051/*
2052 * This function must be called when inode is journaled in ordered mode
2053 * before truncation happens. It starts writeout of truncated part in
2054 * case it is in the committing transaction so that we stand to ordered
2055 * mode consistency guarantees.
2056 */
2057int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
2058 loff_t new_size)
2059{
2060 journal_t *journal;
2061 transaction_t *commit_trans;
2062 int ret = 0;
2063
2064 if (!inode->i_transaction && !inode->i_next_transaction)
2065 goto out;
2066 journal = inode->i_transaction->t_journal;
2067 spin_lock(&journal->j_state_lock);
2068 commit_trans = journal->j_committing_transaction;
2069 spin_unlock(&journal->j_state_lock);
2070 if (inode->i_transaction == commit_trans) {
2071 ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping,
2072 new_size, LLONG_MAX);
2073 if (ret)
2074 jbd2_journal_abort(journal, ret);
2075 }
2076out:
2077 return ret;
2078}
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index bf6ab19b86ee..6a73de84bcef 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -21,6 +21,7 @@
21#include <linux/ctype.h> 21#include <linux/ctype.h>
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/proc_fs.h> 23#include <linux/proc_fs.h>
24#include <linux/seq_file.h>
24#include <asm/uaccess.h> 25#include <asm/uaccess.h>
25#include "jfs_incore.h" 26#include "jfs_incore.h"
26#include "jfs_filsys.h" 27#include "jfs_filsys.h"
@@ -30,29 +31,19 @@
30 31
31static struct proc_dir_entry *base; 32static struct proc_dir_entry *base;
32#ifdef CONFIG_JFS_DEBUG 33#ifdef CONFIG_JFS_DEBUG
33static int loglevel_read(char *page, char **start, off_t off, 34static int jfs_loglevel_proc_show(struct seq_file *m, void *v)
34 int count, int *eof, void *data)
35{ 35{
36 int len; 36 seq_printf(m, "%d\n", jfsloglevel);
37 37 return 0;
38 len = sprintf(page, "%d\n", jfsloglevel); 38}
39
40 len -= off;
41 *start = page + off;
42
43 if (len > count)
44 len = count;
45 else
46 *eof = 1;
47
48 if (len < 0)
49 len = 0;
50 39
51 return len; 40static int jfs_loglevel_proc_open(struct inode *inode, struct file *file)
41{
42 return single_open(file, jfs_loglevel_proc_show, NULL);
52} 43}
53 44
54static int loglevel_write(struct file *file, const char __user *buffer, 45static ssize_t jfs_loglevel_proc_write(struct file *file,
55 unsigned long count, void *data) 46 const char __user *buffer, size_t count, loff_t *ppos)
56{ 47{
57 char c; 48 char c;
58 49
@@ -65,22 +56,30 @@ static int loglevel_write(struct file *file, const char __user *buffer,
65 jfsloglevel = c - '0'; 56 jfsloglevel = c - '0';
66 return count; 57 return count;
67} 58}
59
60static const struct file_operations jfs_loglevel_proc_fops = {
61 .owner = THIS_MODULE,
62 .open = jfs_loglevel_proc_open,
63 .read = seq_read,
64 .llseek = seq_lseek,
65 .release = single_release,
66 .write = jfs_loglevel_proc_write,
67};
68#endif 68#endif
69 69
70static struct { 70static struct {
71 const char *name; 71 const char *name;
72 read_proc_t *read_fn; 72 const struct file_operations *proc_fops;
73 write_proc_t *write_fn;
74} Entries[] = { 73} Entries[] = {
75#ifdef CONFIG_JFS_STATISTICS 74#ifdef CONFIG_JFS_STATISTICS
76 { "lmstats", jfs_lmstats_read, }, 75 { "lmstats", &jfs_lmstats_proc_fops, },
77 { "txstats", jfs_txstats_read, }, 76 { "txstats", &jfs_txstats_proc_fops, },
78 { "xtstat", jfs_xtstat_read, }, 77 { "xtstat", &jfs_xtstat_proc_fops, },
79 { "mpstat", jfs_mpstat_read, }, 78 { "mpstat", &jfs_mpstat_proc_fops, },
80#endif 79#endif
81#ifdef CONFIG_JFS_DEBUG 80#ifdef CONFIG_JFS_DEBUG
82 { "TxAnchor", jfs_txanchor_read, }, 81 { "TxAnchor", &jfs_txanchor_proc_fops, },
83 { "loglevel", loglevel_read, loglevel_write } 82 { "loglevel", &jfs_loglevel_proc_fops }
84#endif 83#endif
85}; 84};
86#define NPROCENT ARRAY_SIZE(Entries) 85#define NPROCENT ARRAY_SIZE(Entries)
@@ -93,13 +92,8 @@ void jfs_proc_init(void)
93 return; 92 return;
94 base->owner = THIS_MODULE; 93 base->owner = THIS_MODULE;
95 94
96 for (i = 0; i < NPROCENT; i++) { 95 for (i = 0; i < NPROCENT; i++)
97 struct proc_dir_entry *p; 96 proc_create(Entries[i].name, 0, base, Entries[i].proc_fops);
98 if ((p = create_proc_entry(Entries[i].name, 0, base))) {
99 p->read_proc = Entries[i].read_fn;
100 p->write_proc = Entries[i].write_fn;
101 }
102 }
103} 97}
104 98
105void jfs_proc_clean(void) 99void jfs_proc_clean(void)
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h
index 044c1e654cc0..eafd1300a00b 100644
--- a/fs/jfs/jfs_debug.h
+++ b/fs/jfs/jfs_debug.h
@@ -62,7 +62,7 @@ extern void jfs_proc_clean(void);
62 62
63extern int jfsloglevel; 63extern int jfsloglevel;
64 64
65extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); 65extern const struct file_operations jfs_txanchor_proc_fops;
66 66
67/* information message: e.g., configuration, major event */ 67/* information message: e.g., configuration, major event */
68#define jfs_info(fmt, arg...) do { \ 68#define jfs_info(fmt, arg...) do { \
@@ -105,10 +105,10 @@ extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
105 * ---------- 105 * ----------
106 */ 106 */
107#ifdef CONFIG_JFS_STATISTICS 107#ifdef CONFIG_JFS_STATISTICS
108extern int jfs_lmstats_read(char *, char **, off_t, int, int *, void *); 108extern const struct file_operations jfs_lmstats_proc_fops;
109extern int jfs_txstats_read(char *, char **, off_t, int, int *, void *); 109extern const struct file_operations jfs_txstats_proc_fops;
110extern int jfs_mpstat_read(char *, char **, off_t, int, int *, void *); 110extern const struct file_operations jfs_mpstat_proc_fops;
111extern int jfs_xtstat_read(char *, char **, off_t, int, int *, void *); 111extern const struct file_operations jfs_xtstat_proc_fops;
112 112
113#define INCREMENT(x) ((x)++) 113#define INCREMENT(x) ((x)++)
114#define DECREMENT(x) ((x)--) 114#define DECREMENT(x) ((x)--)
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h
index cdac2d5bafeb..2545bb317235 100644
--- a/fs/jfs/jfs_dtree.h
+++ b/fs/jfs/jfs_dtree.h
@@ -243,9 +243,6 @@ typedef union {
243#define JFS_REMOVE 3 243#define JFS_REMOVE 3
244#define JFS_RENAME 4 244#define JFS_RENAME 4
245 245
246#define DIRENTSIZ(namlen) \
247 ( (sizeof(struct dirent) - 2*(JFS_NAME_MAX+1) + 2*((namlen)+1) + 3) &~ 3 )
248
249/* 246/*
250 * Maximum file offset for directories. 247 * Maximum file offset for directories.
251 */ 248 */
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 734ec916beaf..d6363d8309d0 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -1520,7 +1520,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1520 jfs_error(ip->i_sb, 1520 jfs_error(ip->i_sb,
1521 "diAlloc: can't find free bit " 1521 "diAlloc: can't find free bit "
1522 "in wmap"); 1522 "in wmap");
1523 return EIO; 1523 return -EIO;
1524 } 1524 }
1525 1525
1526 /* determine the inode number within the 1526 /* determine the inode number within the
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 325a9679b95a..cd2ec2988b59 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -69,6 +69,7 @@
69#include <linux/freezer.h> 69#include <linux/freezer.h>
70#include <linux/delay.h> 70#include <linux/delay.h>
71#include <linux/mutex.h> 71#include <linux/mutex.h>
72#include <linux/seq_file.h>
72#include "jfs_incore.h" 73#include "jfs_incore.h"
73#include "jfs_filsys.h" 74#include "jfs_filsys.h"
74#include "jfs_metapage.h" 75#include "jfs_metapage.h"
@@ -2503,13 +2504,9 @@ exit:
2503} 2504}
2504 2505
2505#ifdef CONFIG_JFS_STATISTICS 2506#ifdef CONFIG_JFS_STATISTICS
2506int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length, 2507static int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2507 int *eof, void *data)
2508{ 2508{
2509 int len = 0; 2509 seq_printf(m,
2510 off_t begin;
2511
2512 len += sprintf(buffer,
2513 "JFS Logmgr stats\n" 2510 "JFS Logmgr stats\n"
2514 "================\n" 2511 "================\n"
2515 "commits = %d\n" 2512 "commits = %d\n"
@@ -2522,19 +2519,19 @@ int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length,
2522 lmStat.pagedone, 2519 lmStat.pagedone,
2523 lmStat.full_page, 2520 lmStat.full_page,
2524 lmStat.partial_page); 2521 lmStat.partial_page);
2522 return 0;
2523}
2525 2524
2526 begin = offset; 2525static int jfs_lmstats_proc_open(struct inode *inode, struct file *file)
2527 *start = buffer + begin; 2526{
2528 len -= begin; 2527 return single_open(file, jfs_lmstats_proc_show, NULL);
2529
2530 if (len > length)
2531 len = length;
2532 else
2533 *eof = 1;
2534
2535 if (len < 0)
2536 len = 0;
2537
2538 return len;
2539} 2528}
2529
2530const struct file_operations jfs_lmstats_proc_fops = {
2531 .owner = THIS_MODULE,
2532 .open = jfs_lmstats_proc_open,
2533 .read = seq_read,
2534 .llseek = seq_lseek,
2535 .release = single_release,
2536};
2540#endif /* CONFIG_JFS_STATISTICS */ 2537#endif /* CONFIG_JFS_STATISTICS */
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index d1e64f2f2fcd..854ff0ec574f 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -19,10 +19,12 @@
19 19
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/mm.h> 21#include <linux/mm.h>
22#include <linux/module.h>
22#include <linux/bio.h> 23#include <linux/bio.h>
23#include <linux/init.h> 24#include <linux/init.h>
24#include <linux/buffer_head.h> 25#include <linux/buffer_head.h>
25#include <linux/mempool.h> 26#include <linux/mempool.h>
27#include <linux/seq_file.h>
26#include "jfs_incore.h" 28#include "jfs_incore.h"
27#include "jfs_superblock.h" 29#include "jfs_superblock.h"
28#include "jfs_filsys.h" 30#include "jfs_filsys.h"
@@ -804,13 +806,9 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len)
804} 806}
805 807
806#ifdef CONFIG_JFS_STATISTICS 808#ifdef CONFIG_JFS_STATISTICS
807int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length, 809static int jfs_mpstat_proc_show(struct seq_file *m, void *v)
808 int *eof, void *data)
809{ 810{
810 int len = 0; 811 seq_printf(m,
811 off_t begin;
812
813 len += sprintf(buffer,
814 "JFS Metapage statistics\n" 812 "JFS Metapage statistics\n"
815 "=======================\n" 813 "=======================\n"
816 "page allocations = %d\n" 814 "page allocations = %d\n"
@@ -819,19 +817,19 @@ int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
819 mpStat.pagealloc, 817 mpStat.pagealloc,
820 mpStat.pagefree, 818 mpStat.pagefree,
821 mpStat.lockwait); 819 mpStat.lockwait);
820 return 0;
821}
822 822
823 begin = offset; 823static int jfs_mpstat_proc_open(struct inode *inode, struct file *file)
824 *start = buffer + begin; 824{
825 len -= begin; 825 return single_open(file, jfs_mpstat_proc_show, NULL);
826
827 if (len > length)
828 len = length;
829 else
830 *eof = 1;
831
832 if (len < 0)
833 len = 0;
834
835 return len;
836} 826}
827
828const struct file_operations jfs_mpstat_proc_fops = {
829 .owner = THIS_MODULE,
830 .open = jfs_mpstat_proc_open,
831 .read = seq_read,
832 .llseek = seq_lseek,
833 .release = single_release,
834};
837#endif 835#endif
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index e7c60ae6b5b2..f26e4d03ada5 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -49,6 +49,7 @@
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/moduleparam.h> 50#include <linux/moduleparam.h>
51#include <linux/kthread.h> 51#include <linux/kthread.h>
52#include <linux/seq_file.h>
52#include "jfs_incore.h" 53#include "jfs_incore.h"
53#include "jfs_inode.h" 54#include "jfs_inode.h"
54#include "jfs_filsys.h" 55#include "jfs_filsys.h"
@@ -3009,11 +3010,8 @@ int jfs_sync(void *arg)
3009} 3010}
3010 3011
3011#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) 3012#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
3012int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, 3013static int jfs_txanchor_proc_show(struct seq_file *m, void *v)
3013 int *eof, void *data)
3014{ 3014{
3015 int len = 0;
3016 off_t begin;
3017 char *freewait; 3015 char *freewait;
3018 char *freelockwait; 3016 char *freelockwait;
3019 char *lowlockwait; 3017 char *lowlockwait;
@@ -3025,7 +3023,7 @@ int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length,
3025 lowlockwait = 3023 lowlockwait =
3026 waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; 3024 waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty";
3027 3025
3028 len += sprintf(buffer, 3026 seq_printf(m,
3029 "JFS TxAnchor\n" 3027 "JFS TxAnchor\n"
3030 "============\n" 3028 "============\n"
3031 "freetid = %d\n" 3029 "freetid = %d\n"
@@ -3044,31 +3042,27 @@ int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length,
3044 TxAnchor.tlocksInUse, 3042 TxAnchor.tlocksInUse,
3045 jfs_tlocks_low, 3043 jfs_tlocks_low,
3046 list_empty(&TxAnchor.unlock_queue) ? "" : "not "); 3044 list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
3045 return 0;
3046}
3047 3047
3048 begin = offset; 3048static int jfs_txanchor_proc_open(struct inode *inode, struct file *file)
3049 *start = buffer + begin; 3049{
3050 len -= begin; 3050 return single_open(file, jfs_txanchor_proc_show, NULL);
3051
3052 if (len > length)
3053 len = length;
3054 else
3055 *eof = 1;
3056
3057 if (len < 0)
3058 len = 0;
3059
3060 return len;
3061} 3051}
3052
3053const struct file_operations jfs_txanchor_proc_fops = {
3054 .owner = THIS_MODULE,
3055 .open = jfs_txanchor_proc_open,
3056 .read = seq_read,
3057 .llseek = seq_lseek,
3058 .release = single_release,
3059};
3062#endif 3060#endif
3063 3061
3064#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) 3062#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
3065int jfs_txstats_read(char *buffer, char **start, off_t offset, int length, 3063static int jfs_txstats_proc_show(struct seq_file *m, void *v)
3066 int *eof, void *data)
3067{ 3064{
3068 int len = 0; 3065 seq_printf(m,
3069 off_t begin;
3070
3071 len += sprintf(buffer,
3072 "JFS TxStats\n" 3066 "JFS TxStats\n"
3073 "===========\n" 3067 "===========\n"
3074 "calls to txBegin = %d\n" 3068 "calls to txBegin = %d\n"
@@ -3089,19 +3083,19 @@ int jfs_txstats_read(char *buffer, char **start, off_t offset, int length,
3089 TxStat.txBeginAnon_lockslow, 3083 TxStat.txBeginAnon_lockslow,
3090 TxStat.txLockAlloc, 3084 TxStat.txLockAlloc,
3091 TxStat.txLockAlloc_freelock); 3085 TxStat.txLockAlloc_freelock);
3086 return 0;
3087}
3092 3088
3093 begin = offset; 3089static int jfs_txstats_proc_open(struct inode *inode, struct file *file)
3094 *start = buffer + begin; 3090{
3095 len -= begin; 3091 return single_open(file, jfs_txstats_proc_show, NULL);
3096
3097 if (len > length)
3098 len = length;
3099 else
3100 *eof = 1;
3101
3102 if (len < 0)
3103 len = 0;
3104
3105 return len;
3106} 3092}
3093
3094const struct file_operations jfs_txstats_proc_fops = {
3095 .owner = THIS_MODULE,
3096 .open = jfs_txstats_proc_open,
3097 .read = seq_read,
3098 .llseek = seq_lseek,
3099 .release = single_release,
3100};
3107#endif 3101#endif
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index 5a61ebf2cbcc..ae3acafb447b 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -20,7 +20,9 @@
20 */ 20 */
21 21
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/module.h>
23#include <linux/quotaops.h> 24#include <linux/quotaops.h>
25#include <linux/seq_file.h>
24#include "jfs_incore.h" 26#include "jfs_incore.h"
25#include "jfs_filsys.h" 27#include "jfs_filsys.h"
26#include "jfs_metapage.h" 28#include "jfs_metapage.h"
@@ -4134,13 +4136,9 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
4134} 4136}
4135 4137
4136#ifdef CONFIG_JFS_STATISTICS 4138#ifdef CONFIG_JFS_STATISTICS
4137int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, 4139static int jfs_xtstat_proc_show(struct seq_file *m, void *v)
4138 int *eof, void *data)
4139{ 4140{
4140 int len = 0; 4141 seq_printf(m,
4141 off_t begin;
4142
4143 len += sprintf(buffer,
4144 "JFS Xtree statistics\n" 4142 "JFS Xtree statistics\n"
4145 "====================\n" 4143 "====================\n"
4146 "searches = %d\n" 4144 "searches = %d\n"
@@ -4149,19 +4147,19 @@ int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length,
4149 xtStat.search, 4147 xtStat.search,
4150 xtStat.fastSearch, 4148 xtStat.fastSearch,
4151 xtStat.split); 4149 xtStat.split);
4150 return 0;
4151}
4152 4152
4153 begin = offset; 4153static int jfs_xtstat_proc_open(struct inode *inode, struct file *file)
4154 *start = buffer + begin; 4154{
4155 len -= begin; 4155 return single_open(file, jfs_xtstat_proc_show, NULL);
4156
4157 if (len > length)
4158 len = length;
4159 else
4160 *eof = 1;
4161
4162 if (len < 0)
4163 len = 0;
4164
4165 return len;
4166} 4156}
4157
4158const struct file_operations jfs_xtstat_proc_fops = {
4159 .owner = THIS_MODULE,
4160 .open = jfs_xtstat_proc_open,
4161 .read = seq_read,
4162 .llseek = seq_lseek,
4163 .release = single_release,
4164};
4167#endif 4165#endif
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 0ba6778edaa2..2aba82386810 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1455,7 +1455,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
1455 free_UCSname(&key); 1455 free_UCSname(&key);
1456 if (rc == -ENOENT) { 1456 if (rc == -ENOENT) {
1457 d_add(dentry, NULL); 1457 d_add(dentry, NULL);
1458 return ERR_PTR(0); 1458 return NULL;
1459 } else if (rc) { 1459 } else if (rc) {
1460 jfs_err("jfs_lookup: dtSearch returned %d", rc); 1460 jfs_err("jfs_lookup: dtSearch returned %d", rc);
1461 return ERR_PTR(rc); 1461 return ERR_PTR(rc);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 50ea65451732..0288e6d7936a 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -499,7 +499,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
499 inode = jfs_iget(sb, ROOT_I); 499 inode = jfs_iget(sb, ROOT_I);
500 if (IS_ERR(inode)) { 500 if (IS_ERR(inode)) {
501 ret = PTR_ERR(inode); 501 ret = PTR_ERR(inode);
502 goto out_no_root; 502 goto out_no_rw;
503 } 503 }
504 sb->s_root = d_alloc_root(inode); 504 sb->s_root = d_alloc_root(inode);
505 if (!sb->s_root) 505 if (!sb->s_root)
@@ -521,9 +521,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
521 return 0; 521 return 0;
522 522
523out_no_root: 523out_no_root:
524 jfs_err("jfs_read_super: get root inode failed"); 524 jfs_err("jfs_read_super: get root dentry failed");
525 if (inode) 525 iput(inode);
526 iput(inode);
527 526
528out_no_rw: 527out_no_rw:
529 rc = jfs_umount(sb); 528 rc = jfs_umount(sb);
diff --git a/fs/mpage.c b/fs/mpage.c
index 235e4d3873a8..dbcc7af76a15 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -82,7 +82,7 @@ static void mpage_end_io_write(struct bio *bio, int err)
82 bio_put(bio); 82 bio_put(bio);
83} 83}
84 84
85static struct bio *mpage_bio_submit(int rw, struct bio *bio) 85struct bio *mpage_bio_submit(int rw, struct bio *bio)
86{ 86{
87 bio->bi_end_io = mpage_end_io_read; 87 bio->bi_end_io = mpage_end_io_read;
88 if (rw == WRITE) 88 if (rw == WRITE)
@@ -90,6 +90,7 @@ static struct bio *mpage_bio_submit(int rw, struct bio *bio)
90 submit_bio(rw, bio); 90 submit_bio(rw, bio);
91 return NULL; 91 return NULL;
92} 92}
93EXPORT_SYMBOL(mpage_bio_submit);
93 94
94static struct bio * 95static struct bio *
95mpage_alloc(struct block_device *bdev, 96mpage_alloc(struct block_device *bdev,
@@ -435,15 +436,9 @@ EXPORT_SYMBOL(mpage_readpage);
435 * written, so it can intelligently allocate a suitably-sized BIO. For now, 436 * written, so it can intelligently allocate a suitably-sized BIO. For now,
436 * just allocate full-size (16-page) BIOs. 437 * just allocate full-size (16-page) BIOs.
437 */ 438 */
438struct mpage_data {
439 struct bio *bio;
440 sector_t last_block_in_bio;
441 get_block_t *get_block;
442 unsigned use_writepage;
443};
444 439
445static int __mpage_writepage(struct page *page, struct writeback_control *wbc, 440int __mpage_writepage(struct page *page, struct writeback_control *wbc,
446 void *data) 441 void *data)
447{ 442{
448 struct mpage_data *mpd = data; 443 struct mpage_data *mpd = data;
449 struct bio *bio = mpd->bio; 444 struct bio *bio = mpd->bio;
@@ -651,6 +646,7 @@ out:
651 mpd->bio = bio; 646 mpd->bio = bio;
652 return ret; 647 return ret;
653} 648}
649EXPORT_SYMBOL(__mpage_writepage);
654 650
655/** 651/**
656 * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them 652 * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them
diff --git a/include/asm-x86/dwarf2.h b/include/asm-x86/dwarf2.h
index fd4a6a0393ac..738bb9fb3e53 100644
--- a/include/asm-x86/dwarf2.h
+++ b/include/asm-x86/dwarf2.h
@@ -38,23 +38,23 @@
38 38
39/* Due to the structure of pre-exisiting code, don't use assembler line 39/* Due to the structure of pre-exisiting code, don't use assembler line
40 comment character # to ignore the arguments. Instead, use a dummy macro. */ 40 comment character # to ignore the arguments. Instead, use a dummy macro. */
41.macro __cfi_ignore a=0, b=0, c=0, d=0 41.macro cfi_ignore a=0, b=0, c=0, d=0
42.endm 42.endm
43 43
44#define CFI_STARTPROC __cfi_ignore 44#define CFI_STARTPROC cfi_ignore
45#define CFI_ENDPROC __cfi_ignore 45#define CFI_ENDPROC cfi_ignore
46#define CFI_DEF_CFA __cfi_ignore 46#define CFI_DEF_CFA cfi_ignore
47#define CFI_DEF_CFA_REGISTER __cfi_ignore 47#define CFI_DEF_CFA_REGISTER cfi_ignore
48#define CFI_DEF_CFA_OFFSET __cfi_ignore 48#define CFI_DEF_CFA_OFFSET cfi_ignore
49#define CFI_ADJUST_CFA_OFFSET __cfi_ignore 49#define CFI_ADJUST_CFA_OFFSET cfi_ignore
50#define CFI_OFFSET __cfi_ignore 50#define CFI_OFFSET cfi_ignore
51#define CFI_REL_OFFSET __cfi_ignore 51#define CFI_REL_OFFSET cfi_ignore
52#define CFI_REGISTER __cfi_ignore 52#define CFI_REGISTER cfi_ignore
53#define CFI_RESTORE __cfi_ignore 53#define CFI_RESTORE cfi_ignore
54#define CFI_REMEMBER_STATE __cfi_ignore 54#define CFI_REMEMBER_STATE cfi_ignore
55#define CFI_RESTORE_STATE __cfi_ignore 55#define CFI_RESTORE_STATE cfi_ignore
56#define CFI_UNDEFINED __cfi_ignore 56#define CFI_UNDEFINED cfi_ignore
57#define CFI_SIGNAL_FRAME __cfi_ignore 57#define CFI_SIGNAL_FRAME cfi_ignore
58 58
59#endif 59#endif
60 60
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 0764b662b339..1c1b13e29223 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1089,6 +1089,7 @@ extern int drm_mm_remove_space_from_tail(struct drm_mm *mm, unsigned long size);
1089extern int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size); 1089extern int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size);
1090 1090
1091extern void drm_core_ioremap(struct drm_map *map, struct drm_device *dev); 1091extern void drm_core_ioremap(struct drm_map *map, struct drm_device *dev);
1092extern void drm_core_ioremap_wc(struct drm_map *map, struct drm_device *dev);
1092extern void drm_core_ioremapfree(struct drm_map *map, struct drm_device *dev); 1093extern void drm_core_ioremapfree(struct drm_map *map, struct drm_device *dev);
1093 1094
1094static __inline__ struct drm_map *drm_core_findmap(struct drm_device *dev, 1095static __inline__ struct drm_map *drm_core_findmap(struct drm_device *dev,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1ffd8bfdc4c9..32a441b05fd5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -651,7 +651,6 @@ extern void generic_make_request(struct bio *bio);
651extern void blk_rq_init(struct request_queue *q, struct request *rq); 651extern void blk_rq_init(struct request_queue *q, struct request *rq);
652extern void blk_put_request(struct request *); 652extern void blk_put_request(struct request *);
653extern void __blk_put_request(struct request_queue *, struct request *); 653extern void __blk_put_request(struct request_queue *, struct request *);
654extern void blk_end_sync_rq(struct request *rq, int error);
655extern struct request *blk_get_request(struct request_queue *, int, gfp_t); 654extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
656extern void blk_insert_request(struct request_queue *, struct request *, int, void *); 655extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
657extern void blk_requeue_request(struct request_queue *, struct request *); 656extern void blk_requeue_request(struct request_queue *, struct request *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index faac13e2cc5c..52e510a0aec2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1740,6 +1740,8 @@ extern int wait_on_page_writeback_range(struct address_space *mapping,
1740 pgoff_t start, pgoff_t end); 1740 pgoff_t start, pgoff_t end);
1741extern int __filemap_fdatawrite_range(struct address_space *mapping, 1741extern int __filemap_fdatawrite_range(struct address_space *mapping,
1742 loff_t start, loff_t end, int sync_mode); 1742 loff_t start, loff_t end, int sync_mode);
1743extern int filemap_fdatawrite_range(struct address_space *mapping,
1744 loff_t start, loff_t end);
1743 1745
1744extern long do_fsync(struct file *file, int datasync); 1746extern long do_fsync(struct file *file, int datasync);
1745extern void sync_supers(void); 1747extern void sync_supers(void);
diff --git a/include/linux/i2c-algo-pcf.h b/include/linux/i2c-algo-pcf.h
index 77afbb60fd11..0177d280f733 100644
--- a/include/linux/i2c-algo-pcf.h
+++ b/include/linux/i2c-algo-pcf.h
@@ -33,9 +33,11 @@ struct i2c_algo_pcf_data {
33 int (*getclock) (void *data); 33 int (*getclock) (void *data);
34 void (*waitforpin) (void); 34 void (*waitforpin) (void);
35 35
36 /* local settings */ 36 /* Multi-master lost arbitration back-off delay (msecs)
37 int udelay; 37 * This should be set by the bus adapter or knowledgable client
38 int timeout; 38 * if bus is multi-mastered, else zero
39 */
40 unsigned long lab_mdelay;
39}; 41};
40 42
41int i2c_pcf_add_bus(struct i2c_adapter *); 43int i2c_pcf_add_bus(struct i2c_adapter *);
diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index 580acc93903e..ef13b7c66df3 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -91,8 +91,6 @@
91#define I2C_DRIVERID_M52790 95 /* Mitsubishi M52790SP/FP AV switch */ 91#define I2C_DRIVERID_M52790 95 /* Mitsubishi M52790SP/FP AV switch */
92#define I2C_DRIVERID_CS5345 96 /* cs5345 audio processor */ 92#define I2C_DRIVERID_CS5345 96 /* cs5345 audio processor */
93 93
94#define I2C_DRIVERID_I2CDEV 900
95
96#define I2C_DRIVERID_OV7670 1048 /* Omnivision 7670 camera */ 94#define I2C_DRIVERID_OV7670 1048 /* Omnivision 7670 camera */
97 95
98/* 96/*
@@ -111,7 +109,6 @@
111#define I2C_HW_B_RIVA 0x010010 /* Riva based graphics cards */ 109#define I2C_HW_B_RIVA 0x010010 /* Riva based graphics cards */
112#define I2C_HW_B_IOC 0x010011 /* IOC bit-wiggling */ 110#define I2C_HW_B_IOC 0x010011 /* IOC bit-wiggling */
113#define I2C_HW_B_IXP2000 0x010016 /* GPIO on IXP2000 systems */ 111#define I2C_HW_B_IXP2000 0x010016 /* GPIO on IXP2000 systems */
114#define I2C_HW_B_S3VIA 0x010018 /* S3Via ProSavage adapter */
115#define I2C_HW_B_ZR36067 0x010019 /* Zoran-36057/36067 based boards */ 112#define I2C_HW_B_ZR36067 0x010019 /* Zoran-36057/36067 based boards */
116#define I2C_HW_B_PCILYNX 0x01001a /* TI PCILynx I2C adapter */ 113#define I2C_HW_B_PCILYNX 0x01001a /* TI PCILynx I2C adapter */
117#define I2C_HW_B_CX2388x 0x01001b /* connexant 2388x based tv cards */ 114#define I2C_HW_B_CX2388x 0x01001b /* connexant 2388x based tv cards */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 8dc730132192..08be0d21864c 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -35,6 +35,8 @@
35#include <linux/sched.h> /* for completion */ 35#include <linux/sched.h> /* for completion */
36#include <linux/mutex.h> 36#include <linux/mutex.h>
37 37
38extern struct bus_type i2c_bus_type;
39
38/* --- General options ------------------------------------------------ */ 40/* --- General options ------------------------------------------------ */
39 41
40struct i2c_msg; 42struct i2c_msg;
@@ -43,6 +45,7 @@ struct i2c_adapter;
43struct i2c_client; 45struct i2c_client;
44struct i2c_driver; 46struct i2c_driver;
45union i2c_smbus_data; 47union i2c_smbus_data;
48struct i2c_board_info;
46 49
47/* 50/*
48 * The master routines are the ones normally used to transmit data to devices 51 * The master routines are the ones normally used to transmit data to devices
@@ -69,9 +72,8 @@ extern s32 i2c_smbus_xfer (struct i2c_adapter * adapter, u16 addr,
69 union i2c_smbus_data * data); 72 union i2c_smbus_data * data);
70 73
71/* Now follow the 'nice' access routines. These also document the calling 74/* Now follow the 'nice' access routines. These also document the calling
72 conventions of smbus_access. */ 75 conventions of i2c_smbus_xfer. */
73 76
74extern s32 i2c_smbus_write_quick(struct i2c_client * client, u8 value);
75extern s32 i2c_smbus_read_byte(struct i2c_client * client); 77extern s32 i2c_smbus_read_byte(struct i2c_client * client);
76extern s32 i2c_smbus_write_byte(struct i2c_client * client, u8 value); 78extern s32 i2c_smbus_write_byte(struct i2c_client * client, u8 value);
77extern s32 i2c_smbus_read_byte_data(struct i2c_client * client, u8 command); 79extern s32 i2c_smbus_read_byte_data(struct i2c_client * client, u8 command);
@@ -93,15 +95,33 @@ extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client * client,
93 u8 command, u8 length, 95 u8 command, u8 length,
94 const u8 *values); 96 const u8 *values);
95 97
96/* 98/**
97 * A driver is capable of handling one or more physical devices present on 99 * struct i2c_driver - represent an I2C device driver
98 * I2C adapters. This information is used to inform the driver of adapter 100 * @class: What kind of i2c device we instantiate (for detect)
99 * events. 101 * @detect: Callback for device detection
102 * @address_data: The I2C addresses to probe, ignore or force (for detect)
103 * @clients: List of detected clients we created (for i2c-core use only)
100 * 104 *
101 * The driver.owner field should be set to the module owner of this driver. 105 * The driver.owner field should be set to the module owner of this driver.
102 * The driver.name field should be set to the name of this driver. 106 * The driver.name field should be set to the name of this driver.
107 *
108 * For automatic device detection, both @detect and @address_data must
109 * be defined. @class should also be set, otherwise only devices forced
110 * with module parameters will be created. The detect function must
111 * fill at least the name field of the i2c_board_info structure it is
112 * handed upon successful detection, and possibly also the flags field.
113 *
114 * If @detect is missing, the driver will still work fine for enumerated
115 * devices. Detected devices simply won't be supported. This is expected
116 * for the many I2C/SMBus devices which can't be detected reliably, and
117 * the ones which can always be enumerated in practice.
118 *
119 * The i2c_client structure which is handed to the @detect callback is
120 * not a real i2c_client. It is initialized just enough so that you can
121 * call i2c_smbus_read_byte_data and friends on it. Don't do anything
122 * else with it. In particular, calling dev_dbg and friends on it is
123 * not allowed.
103 */ 124 */
104
105struct i2c_driver { 125struct i2c_driver {
106 int id; 126 int id;
107 unsigned int class; 127 unsigned int class;
@@ -141,6 +161,11 @@ struct i2c_driver {
141 161
142 struct device_driver driver; 162 struct device_driver driver;
143 const struct i2c_device_id *id_table; 163 const struct i2c_device_id *id_table;
164
165 /* Device detection callback for automatic device creation */
166 int (*detect)(struct i2c_client *, int kind, struct i2c_board_info *);
167 const struct i2c_client_address_data *address_data;
168 struct list_head clients;
144}; 169};
145#define to_i2c_driver(d) container_of(d, struct i2c_driver, driver) 170#define to_i2c_driver(d) container_of(d, struct i2c_driver, driver)
146 171
@@ -156,6 +181,7 @@ struct i2c_driver {
156 * @dev: Driver model device node for the slave. 181 * @dev: Driver model device node for the slave.
157 * @irq: indicates the IRQ generated by this device (if any) 182 * @irq: indicates the IRQ generated by this device (if any)
158 * @list: list of active/busy clients (DEPRECATED) 183 * @list: list of active/busy clients (DEPRECATED)
184 * @detected: member of an i2c_driver.clients list
159 * @released: used to synchronize client releases & detaches and references 185 * @released: used to synchronize client releases & detaches and references
160 * 186 *
161 * An i2c_client identifies a single device (i.e. chip) connected to an 187 * An i2c_client identifies a single device (i.e. chip) connected to an
@@ -173,6 +199,7 @@ struct i2c_client {
173 struct device dev; /* the device structure */ 199 struct device dev; /* the device structure */
174 int irq; /* irq issued by device */ 200 int irq; /* irq issued by device */
175 struct list_head list; /* DEPRECATED */ 201 struct list_head list; /* DEPRECATED */
202 struct list_head detected;
176 struct completion released; 203 struct completion released;
177}; 204};
178#define to_i2c_client(d) container_of(d, struct i2c_client, dev) 205#define to_i2c_client(d) container_of(d, struct i2c_client, dev)
@@ -350,10 +377,11 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data)
350#define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */ 377#define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */
351#define I2C_CLASS_TV_ANALOG (1<<1) /* bttv + friends */ 378#define I2C_CLASS_TV_ANALOG (1<<1) /* bttv + friends */
352#define I2C_CLASS_TV_DIGITAL (1<<2) /* dvb cards */ 379#define I2C_CLASS_TV_DIGITAL (1<<2) /* dvb cards */
353#define I2C_CLASS_DDC (1<<3) /* i2c-matroxfb ? */ 380#define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */
354#define I2C_CLASS_CAM_ANALOG (1<<4) /* camera with analog CCD */ 381#define I2C_CLASS_CAM_ANALOG (1<<4) /* camera with analog CCD */
355#define I2C_CLASS_CAM_DIGITAL (1<<5) /* most webcams */ 382#define I2C_CLASS_CAM_DIGITAL (1<<5) /* most webcams */
356#define I2C_CLASS_SOUND (1<<6) /* sound devices */ 383#define I2C_CLASS_SOUND (1<<6) /* sound devices */
384#define I2C_CLASS_SPD (1<<7) /* SPD EEPROMs and similar */
357#define I2C_CLASS_ALL (UINT_MAX) /* all of the above */ 385#define I2C_CLASS_ALL (UINT_MAX) /* all of the above */
358 386
359/* i2c_client_address_data is the struct for holding default client 387/* i2c_client_address_data is the struct for holding default client
@@ -537,7 +565,7 @@ union i2c_smbus_data {
537 /* and one more for user-space compatibility */ 565 /* and one more for user-space compatibility */
538}; 566};
539 567
540/* smbus_access read or write markers */ 568/* i2c_smbus_xfer read or write markers */
541#define I2C_SMBUS_READ 1 569#define I2C_SMBUS_READ 1
542#define I2C_SMBUS_WRITE 0 570#define I2C_SMBUS_WRITE 0
543 571
diff --git a/include/linux/i2c/at24.h b/include/linux/i2c/at24.h
new file mode 100644
index 000000000000..f6edd522a929
--- /dev/null
+++ b/include/linux/i2c/at24.h
@@ -0,0 +1,28 @@
1#ifndef _LINUX_AT24_H
2#define _LINUX_AT24_H
3
4#include <linux/types.h>
5
6/*
7 * As seen through Linux I2C, differences between the most common types of I2C
8 * memory include:
9 * - How much memory is available (usually specified in bit)?
10 * - What write page size does it support?
11 * - Special flags (16 bit addresses, read_only, world readable...)?
12 *
13 * If you set up a custom eeprom type, please double-check the parameters.
14 * Especially page_size needs extra care, as you risk data loss if your value
15 * is bigger than what the chip actually supports!
16 */
17
18struct at24_platform_data {
19 u32 byte_len; /* size (sum of all addr) */
20 u16 page_size; /* for writes */
21 u8 flags;
22#define AT24_FLAG_ADDR16 0x80 /* address pointer is 16 bit */
23#define AT24_FLAG_READONLY 0x40 /* sysfs-entry will be read-only */
24#define AT24_FLAG_IRUGO 0x20 /* sysfs-entry will be world-readable */
25#define AT24_FLAG_TAKE8ADDR 0x10 /* take always 8 addresses (24c00) */
26};
27
28#endif /* _LINUX_AT24_H */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index eddb6daadf4a..ac4eeb2932ef 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -364,7 +364,6 @@ typedef struct ide_drive_s {
364 u8 wcache; /* status of write cache */ 364 u8 wcache; /* status of write cache */
365 u8 acoustic; /* acoustic management */ 365 u8 acoustic; /* acoustic management */
366 u8 media; /* disk, cdrom, tape, floppy, ... */ 366 u8 media; /* disk, cdrom, tape, floppy, ... */
367 u8 ctl; /* "normal" value for Control register */
368 u8 ready_stat; /* min status value for drive ready */ 367 u8 ready_stat; /* min status value for drive ready */
369 u8 mult_count; /* current multiple sector setting */ 368 u8 mult_count; /* current multiple sector setting */
370 u8 mult_req; /* requested multiple sector setting */ 369 u8 mult_req; /* requested multiple sector setting */
@@ -493,7 +492,7 @@ typedef struct hwif_s {
493 void (*ide_dma_clear_irq)(ide_drive_t *drive); 492 void (*ide_dma_clear_irq)(ide_drive_t *drive);
494 493
495 void (*OUTB)(u8 addr, unsigned long port); 494 void (*OUTB)(u8 addr, unsigned long port);
496 void (*OUTBSYNC)(ide_drive_t *drive, u8 addr, unsigned long port); 495 void (*OUTBSYNC)(struct hwif_s *hwif, u8 addr, unsigned long port);
497 496
498 u8 (*INB)(unsigned long port); 497 u8 (*INB)(unsigned long port);
499 498
@@ -532,7 +531,6 @@ typedef struct hwif_s {
532 unsigned serialized : 1; /* serialized all channel operation */ 531 unsigned serialized : 1; /* serialized all channel operation */
533 unsigned sharing_irq: 1; /* 1 = sharing irq with another hwif */ 532 unsigned sharing_irq: 1; /* 1 = sharing irq with another hwif */
534 unsigned sg_mapped : 1; /* sg_table and sg_nents are ready */ 533 unsigned sg_mapped : 1; /* sg_table and sg_nents are ready */
535 unsigned mmio : 1; /* host uses MMIO */
536 534
537 struct device gendev; 535 struct device gendev;
538 struct device *portdev; 536 struct device *portdev;
@@ -604,12 +602,13 @@ enum {
604 PC_FLAG_SUPPRESS_ERROR = (1 << 1), 602 PC_FLAG_SUPPRESS_ERROR = (1 << 1),
605 PC_FLAG_WAIT_FOR_DSC = (1 << 2), 603 PC_FLAG_WAIT_FOR_DSC = (1 << 2),
606 PC_FLAG_DMA_OK = (1 << 3), 604 PC_FLAG_DMA_OK = (1 << 3),
607 PC_FLAG_DMA_RECOMMENDED = (1 << 4), 605 PC_FLAG_DMA_IN_PROGRESS = (1 << 4),
608 PC_FLAG_DMA_IN_PROGRESS = (1 << 5), 606 PC_FLAG_DMA_ERROR = (1 << 5),
609 PC_FLAG_DMA_ERROR = (1 << 6), 607 PC_FLAG_WRITING = (1 << 6),
610 PC_FLAG_WRITING = (1 << 7),
611 /* command timed out */ 608 /* command timed out */
612 PC_FLAG_TIMEDOUT = (1 << 8), 609 PC_FLAG_TIMEDOUT = (1 << 7),
610 PC_FLAG_ZIP_DRIVE = (1 << 8),
611 PC_FLAG_DRQ_INTERRUPT = (1 << 9),
613}; 612};
614 613
615struct ide_atapi_pc { 614struct ide_atapi_pc {
@@ -642,8 +641,8 @@ struct ide_atapi_pc {
642 * to change/removal later. 641 * to change/removal later.
643 */ 642 */
644 u8 pc_buf[256]; 643 u8 pc_buf[256];
645 void (*idefloppy_callback) (ide_drive_t *); 644
646 ide_startstop_t (*idetape_callback) (ide_drive_t *); 645 void (*callback)(ide_drive_t *);
647 646
648 /* idetape only */ 647 /* idetape only */
649 struct idetape_bh *bh; 648 struct idetape_bh *bh;
@@ -813,10 +812,6 @@ int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, unsig
813#ifndef _IDE_C 812#ifndef _IDE_C
814extern ide_hwif_t ide_hwifs[]; /* master data repository */ 813extern ide_hwif_t ide_hwifs[]; /* master data repository */
815#endif 814#endif
816extern int ide_noacpi;
817extern int ide_acpigtf;
818extern int ide_acpionboot;
819extern int noautodma;
820 815
821extern int ide_vlb_clk; 816extern int ide_vlb_clk;
822extern int ide_pci_clk; 817extern int ide_pci_clk;
@@ -857,25 +852,12 @@ int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long);
857 852
858extern ide_startstop_t ide_do_reset (ide_drive_t *); 853extern ide_startstop_t ide_do_reset (ide_drive_t *);
859 854
860extern void ide_init_drive_cmd (struct request *rq); 855extern void ide_do_drive_cmd(ide_drive_t *, struct request *);
861
862/*
863 * "action" parameter type for ide_do_drive_cmd() below.
864 */
865typedef enum {
866 ide_wait, /* insert rq at end of list, and wait for it */
867 ide_preempt, /* insert rq in front of current request */
868 ide_head_wait, /* insert rq in front of current request and wait for it */
869 ide_end /* insert rq at end of list, but don't wait for it */
870} ide_action_t;
871
872extern int ide_do_drive_cmd(ide_drive_t *, struct request *, ide_action_t);
873 856
874extern void ide_end_drive_cmd(ide_drive_t *, u8, u8); 857extern void ide_end_drive_cmd(ide_drive_t *, u8, u8);
875 858
876enum { 859enum {
877 IDE_TFLAG_LBA48 = (1 << 0), 860 IDE_TFLAG_LBA48 = (1 << 0),
878 IDE_TFLAG_NO_SELECT_MASK = (1 << 1),
879 IDE_TFLAG_FLAGGED = (1 << 2), 861 IDE_TFLAG_FLAGGED = (1 << 2),
880 IDE_TFLAG_OUT_DATA = (1 << 3), 862 IDE_TFLAG_OUT_DATA = (1 << 3),
881 IDE_TFLAG_OUT_HOB_FEATURE = (1 << 4), 863 IDE_TFLAG_OUT_HOB_FEATURE = (1 << 4),
@@ -980,11 +962,23 @@ typedef struct ide_task_s {
980void ide_tf_dump(const char *, struct ide_taskfile *); 962void ide_tf_dump(const char *, struct ide_taskfile *);
981 963
982extern void SELECT_DRIVE(ide_drive_t *); 964extern void SELECT_DRIVE(ide_drive_t *);
965void SELECT_MASK(ide_drive_t *, int);
983 966
984extern int drive_is_ready(ide_drive_t *); 967extern int drive_is_ready(ide_drive_t *);
985 968
986void ide_pktcmd_tf_load(ide_drive_t *, u32, u16, u8); 969void ide_pktcmd_tf_load(ide_drive_t *, u32, u16, u8);
987 970
971ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc,
972 ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry,
973 void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *),
974 void (*retry_pc)(ide_drive_t *), void (*dsc_handle)(ide_drive_t *),
975 void (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned int,
976 int));
977ide_startstop_t ide_transfer_pc(ide_drive_t *, struct ide_atapi_pc *,
978 ide_handler_t *, unsigned int, ide_expiry_t *);
979ide_startstop_t ide_issue_pc(ide_drive_t *, struct ide_atapi_pc *,
980 ide_handler_t *, unsigned int, ide_expiry_t *);
981
988ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *); 982ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *);
989 983
990void task_end_request(ide_drive_t *, struct request *, u8); 984void task_end_request(ide_drive_t *, struct request *, u8);
@@ -996,8 +990,6 @@ int ide_taskfile_ioctl(ide_drive_t *, unsigned int, unsigned long);
996int ide_cmd_ioctl(ide_drive_t *, unsigned int, unsigned long); 990int ide_cmd_ioctl(ide_drive_t *, unsigned int, unsigned long);
997int ide_task_ioctl(ide_drive_t *, unsigned int, unsigned long); 991int ide_task_ioctl(ide_drive_t *, unsigned int, unsigned long);
998 992
999extern int system_bus_clock(void);
1000
1001extern int ide_driveid_update(ide_drive_t *); 993extern int ide_driveid_update(ide_drive_t *);
1002extern int ide_config_drive_speed(ide_drive_t *, u8); 994extern int ide_config_drive_speed(ide_drive_t *, u8);
1003extern u8 eighty_ninty_three (ide_drive_t *); 995extern u8 eighty_ninty_three (ide_drive_t *);
@@ -1349,7 +1341,8 @@ static inline void ide_set_irq(ide_drive_t *drive, int on)
1349{ 1341{
1350 ide_hwif_t *hwif = drive->hwif; 1342 ide_hwif_t *hwif = drive->hwif;
1351 1343
1352 hwif->OUTB(drive->ctl | (on ? 0 : 2), hwif->io_ports.ctl_addr); 1344 hwif->OUTBSYNC(hwif, ATA_DEVCTL_OBS | (on ? 0 : 2),
1345 hwif->io_ports.ctl_addr);
1353} 1346}
1354 1347
1355static inline u8 ide_read_status(ide_drive_t *drive) 1348static inline u8 ide_read_status(ide_drive_t *drive)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index a86186dd0474..62aa4f895abe 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -104,8 +104,11 @@ extern void enable_irq(unsigned int irq);
104 104
105#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) 105#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
106 106
107extern cpumask_t irq_default_affinity;
108
107extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask); 109extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask);
108extern int irq_can_set_affinity(unsigned int irq); 110extern int irq_can_set_affinity(unsigned int irq);
111extern int irq_select_affinity(unsigned int irq);
109 112
110#else /* CONFIG_SMP */ 113#else /* CONFIG_SMP */
111 114
@@ -119,6 +122,8 @@ static inline int irq_can_set_affinity(unsigned int irq)
119 return 0; 122 return 0;
120} 123}
121 124
125static inline int irq_select_affinity(unsigned int irq) { return 0; }
126
122#endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */ 127#endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */
123 128
124#ifdef CONFIG_GENERIC_HARDIRQS 129#ifdef CONFIG_GENERIC_HARDIRQS
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 552e0ec269c9..8ccb462ea42c 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -244,15 +244,6 @@ static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
244} 244}
245#endif 245#endif
246 246
247#ifdef CONFIG_AUTO_IRQ_AFFINITY
248extern int select_smp_affinity(unsigned int irq);
249#else
250static inline int select_smp_affinity(unsigned int irq)
251{
252 return 1;
253}
254#endif
255
256extern int no_irq_affinity; 247extern int no_irq_affinity;
257 248
258static inline int irq_balancing_disabled(unsigned int irq) 249static inline int irq_balancing_disabled(unsigned int irq)
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index d147f0f90360..3dd209007098 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -168,6 +168,8 @@ struct commit_header {
168 unsigned char h_chksum_size; 168 unsigned char h_chksum_size;
169 unsigned char h_padding[2]; 169 unsigned char h_padding[2];
170 __be32 h_chksum[JBD2_CHECKSUM_BYTES]; 170 __be32 h_chksum[JBD2_CHECKSUM_BYTES];
171 __be64 h_commit_sec;
172 __be32 h_commit_nsec;
171}; 173};
172 174
173/* 175/*
@@ -379,6 +381,38 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
379 bit_spin_unlock(BH_JournalHead, &bh->b_state); 381 bit_spin_unlock(BH_JournalHead, &bh->b_state);
380} 382}
381 383
384/* Flags in jbd_inode->i_flags */
385#define __JI_COMMIT_RUNNING 0
386/* Commit of the inode data in progress. We use this flag to protect us from
387 * concurrent deletion of inode. We cannot use reference to inode for this
388 * since we cannot afford doing last iput() on behalf of kjournald
389 */
390#define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING)
391
392/**
393 * struct jbd_inode is the structure linking inodes in ordered mode
394 * present in a transaction so that we can sync them during commit.
395 */
396struct jbd2_inode {
397 /* Which transaction does this inode belong to? Either the running
398 * transaction or the committing one. [j_list_lock] */
399 transaction_t *i_transaction;
400
401 /* Pointer to the running transaction modifying inode's data in case
402 * there is already a committing transaction touching it. [j_list_lock] */
403 transaction_t *i_next_transaction;
404
405 /* List of inodes in the i_transaction [j_list_lock] */
406 struct list_head i_list;
407
408 /* VFS inode this inode belongs to [constant during the lifetime
409 * of the structure] */
410 struct inode *i_vfs_inode;
411
412 /* Flags of inode [j_list_lock] */
413 unsigned int i_flags;
414};
415
382struct jbd2_revoke_table_s; 416struct jbd2_revoke_table_s;
383 417
384/** 418/**
@@ -509,24 +543,12 @@ struct transaction_s
509 struct journal_head *t_reserved_list; 543 struct journal_head *t_reserved_list;
510 544
511 /* 545 /*
512 * Doubly-linked circular list of all buffers under writeout during
513 * commit [j_list_lock]
514 */
515 struct journal_head *t_locked_list;
516
517 /*
518 * Doubly-linked circular list of all metadata buffers owned by this 546 * Doubly-linked circular list of all metadata buffers owned by this
519 * transaction [j_list_lock] 547 * transaction [j_list_lock]
520 */ 548 */
521 struct journal_head *t_buffers; 549 struct journal_head *t_buffers;
522 550
523 /* 551 /*
524 * Doubly-linked circular list of all data buffers still to be
525 * flushed before this transaction can be committed [j_list_lock]
526 */
527 struct journal_head *t_sync_datalist;
528
529 /*
530 * Doubly-linked circular list of all forget buffers (superseded 552 * Doubly-linked circular list of all forget buffers (superseded
531 * buffers which we can un-checkpoint once this transaction commits) 553 * buffers which we can un-checkpoint once this transaction commits)
532 * [j_list_lock] 554 * [j_list_lock]
@@ -565,6 +587,12 @@ struct transaction_s
565 struct journal_head *t_log_list; 587 struct journal_head *t_log_list;
566 588
567 /* 589 /*
590 * List of inodes whose data we've modified in data=ordered mode.
591 * [j_list_lock]
592 */
593 struct list_head t_inode_list;
594
595 /*
568 * Protects info related to handles 596 * Protects info related to handles
569 */ 597 */
570 spinlock_t t_handle_lock; 598 spinlock_t t_handle_lock;
@@ -1004,7 +1032,6 @@ extern int jbd2_journal_extend (handle_t *, int nblocks);
1004extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *); 1032extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
1005extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *); 1033extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
1006extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *); 1034extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
1007extern int jbd2_journal_dirty_data (handle_t *, struct buffer_head *);
1008extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); 1035extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
1009extern void jbd2_journal_release_buffer (handle_t *, struct buffer_head *); 1036extern void jbd2_journal_release_buffer (handle_t *, struct buffer_head *);
1010extern int jbd2_journal_forget (handle_t *, struct buffer_head *); 1037extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
@@ -1044,6 +1071,10 @@ extern void jbd2_journal_ack_err (journal_t *);
1044extern int jbd2_journal_clear_err (journal_t *); 1071extern int jbd2_journal_clear_err (journal_t *);
1045extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); 1072extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
1046extern int jbd2_journal_force_commit(journal_t *); 1073extern int jbd2_journal_force_commit(journal_t *);
1074extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
1075extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size);
1076extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
1077extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);
1047 1078
1048/* 1079/*
1049 * journal_head management 1080 * journal_head management
@@ -1179,15 +1210,13 @@ static inline int jbd_space_needed(journal_t *journal)
1179 1210
1180/* journaling buffer types */ 1211/* journaling buffer types */
1181#define BJ_None 0 /* Not journaled */ 1212#define BJ_None 0 /* Not journaled */
1182#define BJ_SyncData 1 /* Normal data: flush before commit */ 1213#define BJ_Metadata 1 /* Normal journaled metadata */
1183#define BJ_Metadata 2 /* Normal journaled metadata */ 1214#define BJ_Forget 2 /* Buffer superseded by this transaction */
1184#define BJ_Forget 3 /* Buffer superseded by this transaction */ 1215#define BJ_IO 3 /* Buffer is for temporary IO use */
1185#define BJ_IO 4 /* Buffer is for temporary IO use */ 1216#define BJ_Shadow 4 /* Buffer contents being shadowed to the log */
1186#define BJ_Shadow 5 /* Buffer contents being shadowed to the log */ 1217#define BJ_LogCtl 5 /* Buffer contains log descriptors */
1187#define BJ_LogCtl 6 /* Buffer contains log descriptors */ 1218#define BJ_Reserved 6 /* Buffer is reserved for access by journal */
1188#define BJ_Reserved 7 /* Buffer is reserved for access by journal */ 1219#define BJ_Types 7
1189#define BJ_Locked 8 /* Locked for I/O during commit */
1190#define BJ_Types 9
1191 1220
1192extern int jbd_blocks_per_page(struct inode *inode); 1221extern int jbd_blocks_per_page(struct inode *inode);
1193 1222
diff --git a/include/linux/libata.h b/include/linux/libata.h
index e57e5d08312d..5b247b8a6b3b 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -27,6 +27,7 @@
27#define __LINUX_LIBATA_H__ 27#define __LINUX_LIBATA_H__
28 28
29#include <linux/delay.h> 29#include <linux/delay.h>
30#include <linux/jiffies.h>
30#include <linux/interrupt.h> 31#include <linux/interrupt.h>
31#include <linux/dma-mapping.h> 32#include <linux/dma-mapping.h>
32#include <linux/scatterlist.h> 33#include <linux/scatterlist.h>
@@ -115,7 +116,7 @@ enum {
115 /* tag ATA_MAX_QUEUE - 1 is reserved for internal commands */ 116 /* tag ATA_MAX_QUEUE - 1 is reserved for internal commands */
116 ATA_MAX_QUEUE = 32, 117 ATA_MAX_QUEUE = 32,
117 ATA_TAG_INTERNAL = ATA_MAX_QUEUE - 1, 118 ATA_TAG_INTERNAL = ATA_MAX_QUEUE - 1,
118 ATA_SHORT_PAUSE = (HZ >> 6) + 1, 119 ATA_SHORT_PAUSE = 16,
119 120
120 ATAPI_MAX_DRAIN = 16 << 10, 121 ATAPI_MAX_DRAIN = 16 << 10,
121 122
@@ -168,6 +169,7 @@ enum {
168 ATA_LFLAG_ASSUME_CLASS = ATA_LFLAG_ASSUME_ATA | ATA_LFLAG_ASSUME_SEMB, 169 ATA_LFLAG_ASSUME_CLASS = ATA_LFLAG_ASSUME_ATA | ATA_LFLAG_ASSUME_SEMB,
169 ATA_LFLAG_NO_RETRY = (1 << 5), /* don't retry this link */ 170 ATA_LFLAG_NO_RETRY = (1 << 5), /* don't retry this link */
170 ATA_LFLAG_DISABLED = (1 << 6), /* link is disabled */ 171 ATA_LFLAG_DISABLED = (1 << 6), /* link is disabled */
172 ATA_LFLAG_SW_ACTIVITY = (1 << 7), /* keep activity stats */
171 173
172 /* struct ata_port flags */ 174 /* struct ata_port flags */
173 ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ 175 ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */
@@ -190,6 +192,10 @@ enum {
190 ATA_FLAG_AN = (1 << 18), /* controller supports AN */ 192 ATA_FLAG_AN = (1 << 18), /* controller supports AN */
191 ATA_FLAG_PMP = (1 << 19), /* controller supports PMP */ 193 ATA_FLAG_PMP = (1 << 19), /* controller supports PMP */
192 ATA_FLAG_IPM = (1 << 20), /* driver can handle IPM */ 194 ATA_FLAG_IPM = (1 << 20), /* driver can handle IPM */
195 ATA_FLAG_EM = (1 << 21), /* driver supports enclosure
196 * management */
197 ATA_FLAG_SW_ACTIVITY = (1 << 22), /* driver supports sw activity
198 * led */
193 199
194 /* The following flag belongs to ap->pflags but is kept in 200 /* The following flag belongs to ap->pflags but is kept in
195 * ap->flags because it's referenced in many LLDs and will be 201 * ap->flags because it's referenced in many LLDs and will be
@@ -234,17 +240,16 @@ enum {
234 /* bits 24:31 of host->flags are reserved for LLD specific flags */ 240 /* bits 24:31 of host->flags are reserved for LLD specific flags */
235 241
236 /* various lengths of time */ 242 /* various lengths of time */
237 ATA_TMOUT_BOOT = 30 * HZ, /* heuristic */ 243 ATA_TMOUT_BOOT = 30000, /* heuristic */
238 ATA_TMOUT_BOOT_QUICK = 7 * HZ, /* heuristic */ 244 ATA_TMOUT_BOOT_QUICK = 7000, /* heuristic */
239 ATA_TMOUT_INTERNAL = 30 * HZ, 245 ATA_TMOUT_INTERNAL_QUICK = 5000,
240 ATA_TMOUT_INTERNAL_QUICK = 5 * HZ,
241 246
242 /* FIXME: GoVault needs 2s but we can't afford that without 247 /* FIXME: GoVault needs 2s but we can't afford that without
243 * parallel probing. 800ms is enough for iVDR disk 248 * parallel probing. 800ms is enough for iVDR disk
244 * HHD424020F7SV00. Increase to 2secs when parallel probing 249 * HHD424020F7SV00. Increase to 2secs when parallel probing
245 * is in place. 250 * is in place.
246 */ 251 */
247 ATA_TMOUT_FF_WAIT = 4 * HZ / 5, 252 ATA_TMOUT_FF_WAIT = 800,
248 253
249 /* Spec mandates to wait for ">= 2ms" before checking status 254 /* Spec mandates to wait for ">= 2ms" before checking status
250 * after reset. We wait 150ms, because that was the magic 255 * after reset. We wait 150ms, because that was the magic
@@ -256,14 +261,14 @@ enum {
256 * 261 *
257 * Old drivers/ide uses the 2mS rule and then waits for ready. 262 * Old drivers/ide uses the 2mS rule and then waits for ready.
258 */ 263 */
259 ATA_WAIT_AFTER_RESET_MSECS = 150, 264 ATA_WAIT_AFTER_RESET = 150,
260 265
261 /* If PMP is supported, we have to do follow-up SRST. As some 266 /* If PMP is supported, we have to do follow-up SRST. As some
262 * PMPs don't send D2H Reg FIS after hardreset, LLDs are 267 * PMPs don't send D2H Reg FIS after hardreset, LLDs are
263 * advised to wait only for the following duration before 268 * advised to wait only for the following duration before
264 * doing SRST. 269 * doing SRST.
265 */ 270 */
266 ATA_TMOUT_PMP_SRST_WAIT = 1 * HZ, 271 ATA_TMOUT_PMP_SRST_WAIT = 1000,
267 272
268 /* ATA bus states */ 273 /* ATA bus states */
269 BUS_UNKNOWN = 0, 274 BUS_UNKNOWN = 0,
@@ -340,6 +345,11 @@ enum {
340 345
341 SATA_PMP_RW_TIMEOUT = 3000, /* PMP read/write timeout */ 346 SATA_PMP_RW_TIMEOUT = 3000, /* PMP read/write timeout */
342 347
348 /* This should match the actual table size of
349 * ata_eh_cmd_timeout_table in libata-eh.c.
350 */
351 ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 5,
352
343 /* Horkage types. May be set by libata or controller on drives 353 /* Horkage types. May be set by libata or controller on drives
344 (some horkage may be drive/controller pair dependant */ 354 (some horkage may be drive/controller pair dependant */
345 355
@@ -441,6 +451,15 @@ enum link_pm {
441 MEDIUM_POWER, 451 MEDIUM_POWER,
442}; 452};
443extern struct device_attribute dev_attr_link_power_management_policy; 453extern struct device_attribute dev_attr_link_power_management_policy;
454extern struct device_attribute dev_attr_em_message_type;
455extern struct device_attribute dev_attr_em_message;
456extern struct device_attribute dev_attr_sw_activity;
457
458enum sw_activity {
459 OFF,
460 BLINK_ON,
461 BLINK_OFF,
462};
444 463
445#ifdef CONFIG_ATA_SFF 464#ifdef CONFIG_ATA_SFF
446struct ata_ioports { 465struct ata_ioports {
@@ -597,10 +616,14 @@ struct ata_eh_info {
597struct ata_eh_context { 616struct ata_eh_context {
598 struct ata_eh_info i; 617 struct ata_eh_info i;
599 int tries[ATA_MAX_DEVICES]; 618 int tries[ATA_MAX_DEVICES];
619 int cmd_timeout_idx[ATA_MAX_DEVICES]
620 [ATA_EH_CMD_TIMEOUT_TABLE_SIZE];
600 unsigned int classes[ATA_MAX_DEVICES]; 621 unsigned int classes[ATA_MAX_DEVICES];
601 unsigned int did_probe_mask; 622 unsigned int did_probe_mask;
602 unsigned int saved_ncq_enabled; 623 unsigned int saved_ncq_enabled;
603 u8 saved_xfer_mode[ATA_MAX_DEVICES]; 624 u8 saved_xfer_mode[ATA_MAX_DEVICES];
625 /* timestamp for the last reset attempt or success */
626 unsigned long last_reset;
604}; 627};
605 628
606struct ata_acpi_drive 629struct ata_acpi_drive
@@ -692,6 +715,7 @@ struct ata_port {
692 struct timer_list fastdrain_timer; 715 struct timer_list fastdrain_timer;
693 unsigned long fastdrain_cnt; 716 unsigned long fastdrain_cnt;
694 717
718 int em_message_type;
695 void *private_data; 719 void *private_data;
696 720
697#ifdef CONFIG_ATA_ACPI 721#ifdef CONFIG_ATA_ACPI
@@ -783,6 +807,12 @@ struct ata_port_operations {
783 u8 (*bmdma_status)(struct ata_port *ap); 807 u8 (*bmdma_status)(struct ata_port *ap);
784#endif /* CONFIG_ATA_SFF */ 808#endif /* CONFIG_ATA_SFF */
785 809
810 ssize_t (*em_show)(struct ata_port *ap, char *buf);
811 ssize_t (*em_store)(struct ata_port *ap, const char *message,
812 size_t size);
813 ssize_t (*sw_activity_show)(struct ata_device *dev, char *buf);
814 ssize_t (*sw_activity_store)(struct ata_device *dev,
815 enum sw_activity val);
786 /* 816 /*
787 * Obsolete 817 * Obsolete
788 */ 818 */
@@ -895,8 +925,7 @@ extern void ata_host_resume(struct ata_host *host);
895#endif 925#endif
896extern int ata_ratelimit(void); 926extern int ata_ratelimit(void);
897extern u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val, 927extern u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val,
898 unsigned long interval_msec, 928 unsigned long interval, unsigned long timeout);
899 unsigned long timeout_msec);
900extern int atapi_cmd_type(u8 opcode); 929extern int atapi_cmd_type(u8 opcode);
901extern void ata_tf_to_fis(const struct ata_taskfile *tf, 930extern void ata_tf_to_fis(const struct ata_taskfile *tf,
902 u8 pmp, int is_cmd, u8 *fis); 931 u8 pmp, int is_cmd, u8 *fis);
@@ -1389,6 +1418,12 @@ static inline int ata_check_ready(u8 status)
1389 return 0; 1418 return 0;
1390} 1419}
1391 1420
1421static inline unsigned long ata_deadline(unsigned long from_jiffies,
1422 unsigned long timeout_msecs)
1423{
1424 return from_jiffies + msecs_to_jiffies(timeout_msecs);
1425}
1426
1392 1427
1393/************************************************************************** 1428/**************************************************************************
1394 * PMP - drivers/ata/libata-pmp.c 1429 * PMP - drivers/ata/libata-pmp.c
diff --git a/include/linux/lm_interface.h b/include/linux/lm_interface.h
index f274997bc283..2ed8fa1b762b 100644
--- a/include/linux/lm_interface.h
+++ b/include/linux/lm_interface.h
@@ -122,11 +122,9 @@ typedef void (*lm_callback_t) (void *ptr, unsigned int type, void *data);
122 */ 122 */
123 123
124#define LM_OUT_ST_MASK 0x00000003 124#define LM_OUT_ST_MASK 0x00000003
125#define LM_OUT_CACHEABLE 0x00000004
126#define LM_OUT_CANCELED 0x00000008 125#define LM_OUT_CANCELED 0x00000008
127#define LM_OUT_ASYNC 0x00000080 126#define LM_OUT_ASYNC 0x00000080
128#define LM_OUT_ERROR 0x00000100 127#define LM_OUT_ERROR 0x00000100
129#define LM_OUT_CONV_DEADLK 0x00000200
130 128
131/* 129/*
132 * lm_callback_t types 130 * lm_callback_t types
@@ -138,9 +136,6 @@ typedef void (*lm_callback_t) (void *ptr, unsigned int type, void *data);
138 * LM_CB_NEED_RECOVERY 136 * LM_CB_NEED_RECOVERY
139 * The given journal needs to be recovered. 137 * The given journal needs to be recovered.
140 * 138 *
141 * LM_CB_DROPLOCKS
142 * Reduce the number of cached locks.
143 *
144 * LM_CB_ASYNC 139 * LM_CB_ASYNC
145 * The given lock has been granted. 140 * The given lock has been granted.
146 */ 141 */
@@ -149,7 +144,6 @@ typedef void (*lm_callback_t) (void *ptr, unsigned int type, void *data);
149#define LM_CB_NEED_D 258 144#define LM_CB_NEED_D 258
150#define LM_CB_NEED_S 259 145#define LM_CB_NEED_S 259
151#define LM_CB_NEED_RECOVERY 260 146#define LM_CB_NEED_RECOVERY 260
152#define LM_CB_DROPLOCKS 261
153#define LM_CB_ASYNC 262 147#define LM_CB_ASYNC 262
154 148
155/* 149/*
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index a744383d16e9..81b3dd5206e0 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -398,7 +398,8 @@ int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_waterm
398int mlx4_INIT_PORT(struct mlx4_dev *dev, int port); 398int mlx4_INIT_PORT(struct mlx4_dev *dev, int port);
399int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port); 399int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
400 400
401int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); 401int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
402 int block_mcast_loopback);
402int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); 403int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]);
403 404
404int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, 405int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
diff --git a/include/linux/mpage.h b/include/linux/mpage.h
index 068a0c9946af..5c42821da2d1 100644
--- a/include/linux/mpage.h
+++ b/include/linux/mpage.h
@@ -11,11 +11,21 @@
11 */ 11 */
12#ifdef CONFIG_BLOCK 12#ifdef CONFIG_BLOCK
13 13
14struct mpage_data {
15 struct bio *bio;
16 sector_t last_block_in_bio;
17 get_block_t *get_block;
18 unsigned use_writepage;
19};
20
14struct writeback_control; 21struct writeback_control;
15 22
23struct bio *mpage_bio_submit(int rw, struct bio *bio);
16int mpage_readpages(struct address_space *mapping, struct list_head *pages, 24int mpage_readpages(struct address_space *mapping, struct list_head *pages,
17 unsigned nr_pages, get_block_t get_block); 25 unsigned nr_pages, get_block_t get_block);
18int mpage_readpage(struct page *page, get_block_t get_block); 26int mpage_readpage(struct page *page, get_block_t get_block);
27int __mpage_writepage(struct page *page, struct writeback_control *wbc,
28 void *data);
19int mpage_writepages(struct address_space *mapping, 29int mpage_writepages(struct address_space *mapping,
20 struct writeback_control *wbc, get_block_t get_block); 30 struct writeback_control *wbc, get_block_t get_block);
21int mpage_writepage(struct page *page, get_block_t *get_block, 31int mpage_writepage(struct page *page, get_block_t *get_block,
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 9007ccdfc112..208388835357 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
35void percpu_counter_destroy(struct percpu_counter *fbc); 35void percpu_counter_destroy(struct percpu_counter *fbc);
36void percpu_counter_set(struct percpu_counter *fbc, s64 amount); 36void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
37void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); 37void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
38s64 __percpu_counter_sum(struct percpu_counter *fbc); 38s64 __percpu_counter_sum(struct percpu_counter *fbc, int set);
39 39
40static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) 40static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
41{ 41{
@@ -44,13 +44,19 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
44 44
45static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) 45static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
46{ 46{
47 s64 ret = __percpu_counter_sum(fbc); 47 s64 ret = __percpu_counter_sum(fbc, 0);
48 return ret < 0 ? 0 : ret; 48 return ret < 0 ? 0 : ret;
49} 49}
50 50
51static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc)
52{
53 return __percpu_counter_sum(fbc, 1);
54}
55
56
51static inline s64 percpu_counter_sum(struct percpu_counter *fbc) 57static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
52{ 58{
53 return __percpu_counter_sum(fbc); 59 return __percpu_counter_sum(fbc, 0);
54} 60}
55 61
56static inline s64 percpu_counter_read(struct percpu_counter *fbc) 62static inline s64 percpu_counter_read(struct percpu_counter *fbc)
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 24f3d2282e11..2158fc0d5a56 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -179,4 +179,17 @@ void arch_update_cpu_topology(void);
179#endif 179#endif
180#endif /* CONFIG_NUMA */ 180#endif /* CONFIG_NUMA */
181 181
182#ifndef topology_physical_package_id
183#define topology_physical_package_id(cpu) ((void)(cpu), -1)
184#endif
185#ifndef topology_core_id
186#define topology_core_id(cpu) ((void)(cpu), 0)
187#endif
188#ifndef topology_thread_siblings
189#define topology_thread_siblings(cpu) cpumask_of_cpu(cpu)
190#endif
191#ifndef topology_core_siblings
192#define topology_core_siblings(cpu) cpumask_of_cpu(cpu)
193#endif
194
182#endif /* _LINUX_TOPOLOGY_H */ 195#endif /* _LINUX_TOPOLOGY_H */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index bd91987c065f..12b15c561a1f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -63,6 +63,7 @@ struct writeback_control {
63 unsigned for_writepages:1; /* This is a writepages() call */ 63 unsigned for_writepages:1; /* This is a writepages() call */
64 unsigned range_cyclic:1; /* range_start is cyclic */ 64 unsigned range_cyclic:1; /* range_start is cyclic */
65 unsigned more_io:1; /* more io to be dispatched */ 65 unsigned more_io:1; /* more io to be dispatched */
66 unsigned range_cont:1;
66}; 67};
67 68
68/* 69/*
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index c36750ff6ae8..483057b2f4b4 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -2,29 +2,33 @@
2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation. All rights reserved. 3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
4 * 4 *
5 * This Software is licensed under one of the following licenses: 5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
6 * 10 *
7 * 1) under the terms of the "Common Public License 1.0" a copy of which is 11 * Redistribution and use in source and binary forms, with or
8 * available from the Open Source Initiative, see 12 * without modification, are permitted provided that the following
9 * http://www.opensource.org/licenses/cpl.php. 13 * conditions are met:
10 * 14 *
11 * 2) under the terms of the "The BSD License" a copy of which is 15 * - Redistributions of source code must retain the above
12 * available from the Open Source Initiative, see 16 * copyright notice, this list of conditions and the following
13 * http://www.opensource.org/licenses/bsd-license.php. 17 * disclaimer.
14 * 18 *
15 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a 19 * - Redistributions in binary form must reproduce the above
16 * copy of which is available from the Open Source Initiative, see 20 * copyright notice, this list of conditions and the following
17 * http://www.opensource.org/licenses/gpl-license.php. 21 * disclaimer in the documentation and/or other materials
18 * 22 * provided with the distribution.
19 * Licensee has the right to choose one of the above licenses.
20 *
21 * Redistributions of source code must retain the above copyright
22 * notice and one of the license notices.
23 *
24 * Redistributions in binary form must reproduce both the above copyright
25 * notice, one of the license notices in the documentation
26 * and/or other materials provided with the distribution.
27 * 23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
28 */ 32 */
29 33
30#if !defined(IB_ADDR_H) 34#if !defined(IB_ADDR_H)
@@ -57,6 +61,7 @@ struct rdma_dev_addr {
57 unsigned char dst_dev_addr[MAX_ADDR_LEN]; 61 unsigned char dst_dev_addr[MAX_ADDR_LEN];
58 unsigned char broadcast[MAX_ADDR_LEN]; 62 unsigned char broadcast[MAX_ADDR_LEN];
59 enum rdma_node_type dev_type; 63 enum rdma_node_type dev_type;
64 struct net_device *src_dev;
60}; 65};
61 66
62/** 67/**
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index f179d233ffc3..00a2b8ec327f 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: ib_cache.h 1349 2004-12-16 21:09:43Z roland $
35 */ 33 */
36 34
37#ifndef _IB_CACHE_H 35#ifndef _IB_CACHE_H
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index a627c8682d2f..ec7c6d99ed3f 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -31,8 +31,6 @@
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE. 33 * SOFTWARE.
34 *
35 * $Id: ib_cm.h 4311 2005-12-05 18:42:01Z sean.hefty $
36 */ 34 */
37#if !defined(IB_CM_H) 35#if !defined(IB_CM_H)
38#define IB_CM_H 36#define IB_CM_H
diff --git a/include/rdma/ib_fmr_pool.h b/include/rdma/ib_fmr_pool.h
index 00dadbf94e1d..f62b842e6596 100644
--- a/include/rdma/ib_fmr_pool.h
+++ b/include/rdma/ib_fmr_pool.h
@@ -29,8 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: ib_fmr_pool.h 2730 2005-06-28 16:43:03Z sean.hefty $
34 */ 32 */
35 33
36#if !defined(IB_FMR_POOL_H) 34#if !defined(IB_FMR_POOL_H)
@@ -61,7 +59,7 @@ struct ib_fmr_pool_param {
61 int pool_size; 59 int pool_size;
62 int dirty_watermark; 60 int dirty_watermark;
63 void (*flush_function)(struct ib_fmr_pool *pool, 61 void (*flush_function)(struct ib_fmr_pool *pool,
64 void * arg); 62 void *arg);
65 void *flush_arg; 63 void *flush_arg;
66 unsigned cache:1; 64 unsigned cache:1;
67}; 65};
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 7228c056b9e9..5f6c40fffcf4 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -32,11 +32,9 @@
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE. 34 * SOFTWARE.
35 *
36 * $Id: ib_mad.h 5596 2006-03-03 01:00:07Z sean.hefty $
37 */ 35 */
38 36
39#if !defined( IB_MAD_H ) 37#if !defined(IB_MAD_H)
40#define IB_MAD_H 38#define IB_MAD_H
41 39
42#include <linux/list.h> 40#include <linux/list.h>
@@ -194,8 +192,7 @@ struct ib_vendor_mad {
194 u8 data[IB_MGMT_VENDOR_DATA]; 192 u8 data[IB_MGMT_VENDOR_DATA];
195}; 193};
196 194
197struct ib_class_port_info 195struct ib_class_port_info {
198{
199 u8 base_version; 196 u8 base_version;
200 u8 class_version; 197 u8 class_version;
201 __be16 capability_mask; 198 __be16 capability_mask;
@@ -614,11 +611,11 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
614 * any class specific header, and MAD data area. 611 * any class specific header, and MAD data area.
615 * If @rmpp_active is set, the RMPP header will be initialized for sending. 612 * If @rmpp_active is set, the RMPP header will be initialized for sending.
616 */ 613 */
617struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, 614struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent,
618 u32 remote_qpn, u16 pkey_index, 615 u32 remote_qpn, u16 pkey_index,
619 int rmpp_active, 616 int rmpp_active,
620 int hdr_len, int data_len, 617 int hdr_len, int data_len,
621 gfp_t gfp_mask); 618 gfp_t gfp_mask);
622 619
623/** 620/**
624 * ib_is_mad_class_rmpp - returns whether given management class 621 * ib_is_mad_class_rmpp - returns whether given management class
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index f926020d6331..d7fc45c4eba9 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -28,8 +28,6 @@
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 *
32 * $Id: ib_pack.h 1349 2004-12-16 21:09:43Z roland $
33 */ 31 */
34 32
35#ifndef IB_PACK_H 33#ifndef IB_PACK_H
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 942692b0b92e..3841c1aff692 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -30,8 +30,6 @@
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 *
34 * $Id: ib_sa.h 2811 2005-07-06 18:11:43Z halr $
35 */ 33 */
36 34
37#ifndef IB_SA_H 35#ifndef IB_SA_H
diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h
index f29af135ba83..aaca0878668f 100644
--- a/include/rdma/ib_smi.h
+++ b/include/rdma/ib_smi.h
@@ -32,11 +32,9 @@
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE. 34 * SOFTWARE.
35 *
36 * $Id: ib_smi.h 1389 2004-12-27 22:56:47Z roland $
37 */ 35 */
38 36
39#if !defined( IB_SMI_H ) 37#if !defined(IB_SMI_H)
40#define IB_SMI_H 38#define IB_SMI_H
41 39
42#include <rdma/ib_mad.h> 40#include <rdma/ib_mad.h>
diff --git a/include/rdma/ib_user_cm.h b/include/rdma/ib_user_cm.h
index 37650afb982c..bd3d380781e0 100644
--- a/include/rdma/ib_user_cm.h
+++ b/include/rdma/ib_user_cm.h
@@ -29,8 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: ib_user_cm.h 4019 2005-11-11 00:33:09Z sean.hefty $
34 */ 32 */
35 33
36#ifndef IB_USER_CM_H 34#ifndef IB_USER_CM_H
diff --git a/include/rdma/ib_user_mad.h b/include/rdma/ib_user_mad.h
index 29d2c7205a90..d6fce1cbdb90 100644
--- a/include/rdma/ib_user_mad.h
+++ b/include/rdma/ib_user_mad.h
@@ -29,8 +29,6 @@
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 *
33 * $Id: ib_user_mad.h 2814 2005-07-06 19:14:09Z halr $
34 */ 32 */
35 33
36#ifndef IB_USER_MAD_H 34#ifndef IB_USER_MAD_H
diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h
index 8d65bf0a625b..a17f77106149 100644
--- a/include/rdma/ib_user_verbs.h
+++ b/include/rdma/ib_user_verbs.h
@@ -31,8 +31,6 @@
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE. 33 * SOFTWARE.
34 *
35 * $Id: ib_user_verbs.h 4019 2005-11-11 00:33:09Z sean.hefty $
36 */ 34 */
37 35
38#ifndef IB_USER_VERBS_H 36#ifndef IB_USER_VERBS_H
@@ -291,7 +289,10 @@ struct ib_uverbs_wc {
291 __u32 opcode; 289 __u32 opcode;
292 __u32 vendor_err; 290 __u32 vendor_err;
293 __u32 byte_len; 291 __u32 byte_len;
294 __u32 imm_data; 292 union {
293 __u32 imm_data;
294 __u32 invalidate_rkey;
295 } ex;
295 __u32 qp_num; 296 __u32 qp_num;
296 __u32 src_qp; 297 __u32 src_qp;
297 __u32 wc_flags; 298 __u32 wc_flags;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 31d30b1852e8..90b529f7a154 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -34,8 +34,6 @@
34 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
35 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 * SOFTWARE. 36 * SOFTWARE.
37 *
38 * $Id: ib_verbs.h 1349 2004-12-16 21:09:43Z roland $
39 */ 37 */
40 38
41#if !defined(IB_VERBS_H) 39#if !defined(IB_VERBS_H)
@@ -93,7 +91,7 @@ enum ib_device_cap_flags {
93 IB_DEVICE_RC_RNR_NAK_GEN = (1<<12), 91 IB_DEVICE_RC_RNR_NAK_GEN = (1<<12),
94 IB_DEVICE_SRQ_RESIZE = (1<<13), 92 IB_DEVICE_SRQ_RESIZE = (1<<13),
95 IB_DEVICE_N_NOTIFY_CQ = (1<<14), 93 IB_DEVICE_N_NOTIFY_CQ = (1<<14),
96 IB_DEVICE_ZERO_STAG = (1<<15), 94 IB_DEVICE_LOCAL_DMA_LKEY = (1<<15),
97 IB_DEVICE_RESERVED = (1<<16), /* old SEND_W_INV */ 95 IB_DEVICE_RESERVED = (1<<16), /* old SEND_W_INV */
98 IB_DEVICE_MEM_WINDOW = (1<<17), 96 IB_DEVICE_MEM_WINDOW = (1<<17),
99 /* 97 /*
@@ -105,6 +103,8 @@ enum ib_device_cap_flags {
105 */ 103 */
106 IB_DEVICE_UD_IP_CSUM = (1<<18), 104 IB_DEVICE_UD_IP_CSUM = (1<<18),
107 IB_DEVICE_UD_TSO = (1<<19), 105 IB_DEVICE_UD_TSO = (1<<19),
106 IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21),
107 IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
108}; 108};
109 109
110enum ib_atomic_cap { 110enum ib_atomic_cap {
@@ -150,6 +150,7 @@ struct ib_device_attr {
150 int max_srq; 150 int max_srq;
151 int max_srq_wr; 151 int max_srq_wr;
152 int max_srq_sge; 152 int max_srq_sge;
153 unsigned int max_fast_reg_page_list_len;
153 u16 max_pkeys; 154 u16 max_pkeys;
154 u8 local_ca_ack_delay; 155 u8 local_ca_ack_delay;
155}; 156};
@@ -226,6 +227,57 @@ static inline int ib_width_enum_to_int(enum ib_port_width width)
226 } 227 }
227} 228}
228 229
230struct ib_protocol_stats {
231 /* TBD... */
232};
233
234struct iw_protocol_stats {
235 u64 ipInReceives;
236 u64 ipInHdrErrors;
237 u64 ipInTooBigErrors;
238 u64 ipInNoRoutes;
239 u64 ipInAddrErrors;
240 u64 ipInUnknownProtos;
241 u64 ipInTruncatedPkts;
242 u64 ipInDiscards;
243 u64 ipInDelivers;
244 u64 ipOutForwDatagrams;
245 u64 ipOutRequests;
246 u64 ipOutDiscards;
247 u64 ipOutNoRoutes;
248 u64 ipReasmTimeout;
249 u64 ipReasmReqds;
250 u64 ipReasmOKs;
251 u64 ipReasmFails;
252 u64 ipFragOKs;
253 u64 ipFragFails;
254 u64 ipFragCreates;
255 u64 ipInMcastPkts;
256 u64 ipOutMcastPkts;
257 u64 ipInBcastPkts;
258 u64 ipOutBcastPkts;
259
260 u64 tcpRtoAlgorithm;
261 u64 tcpRtoMin;
262 u64 tcpRtoMax;
263 u64 tcpMaxConn;
264 u64 tcpActiveOpens;
265 u64 tcpPassiveOpens;
266 u64 tcpAttemptFails;
267 u64 tcpEstabResets;
268 u64 tcpCurrEstab;
269 u64 tcpInSegs;
270 u64 tcpOutSegs;
271 u64 tcpRetransSegs;
272 u64 tcpInErrs;
273 u64 tcpOutRsts;
274};
275
276union rdma_protocol_stats {
277 struct ib_protocol_stats ib;
278 struct iw_protocol_stats iw;
279};
280
229struct ib_port_attr { 281struct ib_port_attr {
230 enum ib_port_state state; 282 enum ib_port_state state;
231 enum ib_mtu max_mtu; 283 enum ib_mtu max_mtu;
@@ -413,6 +465,8 @@ enum ib_wc_opcode {
413 IB_WC_FETCH_ADD, 465 IB_WC_FETCH_ADD,
414 IB_WC_BIND_MW, 466 IB_WC_BIND_MW,
415 IB_WC_LSO, 467 IB_WC_LSO,
468 IB_WC_LOCAL_INV,
469 IB_WC_FAST_REG_MR,
416/* 470/*
417 * Set value of IB_WC_RECV so consumers can test if a completion is a 471 * Set value of IB_WC_RECV so consumers can test if a completion is a
418 * receive by testing (opcode & IB_WC_RECV). 472 * receive by testing (opcode & IB_WC_RECV).
@@ -423,7 +477,8 @@ enum ib_wc_opcode {
423 477
424enum ib_wc_flags { 478enum ib_wc_flags {
425 IB_WC_GRH = 1, 479 IB_WC_GRH = 1,
426 IB_WC_WITH_IMM = (1<<1) 480 IB_WC_WITH_IMM = (1<<1),
481 IB_WC_WITH_INVALIDATE = (1<<2),
427}; 482};
428 483
429struct ib_wc { 484struct ib_wc {
@@ -433,7 +488,10 @@ struct ib_wc {
433 u32 vendor_err; 488 u32 vendor_err;
434 u32 byte_len; 489 u32 byte_len;
435 struct ib_qp *qp; 490 struct ib_qp *qp;
436 __be32 imm_data; 491 union {
492 __be32 imm_data;
493 u32 invalidate_rkey;
494 } ex;
437 u32 src_qp; 495 u32 src_qp;
438 int wc_flags; 496 int wc_flags;
439 u16 pkey_index; 497 u16 pkey_index;
@@ -498,7 +556,8 @@ enum ib_qp_type {
498}; 556};
499 557
500enum ib_qp_create_flags { 558enum ib_qp_create_flags {
501 IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, 559 IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0,
560 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
502}; 561};
503 562
504struct ib_qp_init_attr { 563struct ib_qp_init_attr {
@@ -627,6 +686,9 @@ enum ib_wr_opcode {
627 IB_WR_ATOMIC_FETCH_AND_ADD, 686 IB_WR_ATOMIC_FETCH_AND_ADD,
628 IB_WR_LSO, 687 IB_WR_LSO,
629 IB_WR_SEND_WITH_INV, 688 IB_WR_SEND_WITH_INV,
689 IB_WR_RDMA_READ_WITH_INV,
690 IB_WR_LOCAL_INV,
691 IB_WR_FAST_REG_MR,
630}; 692};
631 693
632enum ib_send_flags { 694enum ib_send_flags {
@@ -643,6 +705,12 @@ struct ib_sge {
643 u32 lkey; 705 u32 lkey;
644}; 706};
645 707
708struct ib_fast_reg_page_list {
709 struct ib_device *device;
710 u64 *page_list;
711 unsigned int max_page_list_len;
712};
713
646struct ib_send_wr { 714struct ib_send_wr {
647 struct ib_send_wr *next; 715 struct ib_send_wr *next;
648 u64 wr_id; 716 u64 wr_id;
@@ -675,6 +743,15 @@ struct ib_send_wr {
675 u16 pkey_index; /* valid for GSI only */ 743 u16 pkey_index; /* valid for GSI only */
676 u8 port_num; /* valid for DR SMPs on switch only */ 744 u8 port_num; /* valid for DR SMPs on switch only */
677 } ud; 745 } ud;
746 struct {
747 u64 iova_start;
748 struct ib_fast_reg_page_list *page_list;
749 unsigned int page_shift;
750 unsigned int page_list_len;
751 u32 length;
752 int access_flags;
753 u32 rkey;
754 } fast_reg;
678 } wr; 755 } wr;
679}; 756};
680 757
@@ -777,7 +854,7 @@ struct ib_cq {
777 struct ib_uobject *uobject; 854 struct ib_uobject *uobject;
778 ib_comp_handler comp_handler; 855 ib_comp_handler comp_handler;
779 void (*event_handler)(struct ib_event *, void *); 856 void (*event_handler)(struct ib_event *, void *);
780 void * cq_context; 857 void *cq_context;
781 int cqe; 858 int cqe;
782 atomic_t usecnt; /* count number of work queues */ 859 atomic_t usecnt; /* count number of work queues */
783}; 860};
@@ -883,7 +960,7 @@ struct ib_dma_mapping_ops {
883 void (*sync_single_for_cpu)(struct ib_device *dev, 960 void (*sync_single_for_cpu)(struct ib_device *dev,
884 u64 dma_handle, 961 u64 dma_handle,
885 size_t size, 962 size_t size,
886 enum dma_data_direction dir); 963 enum dma_data_direction dir);
887 void (*sync_single_for_device)(struct ib_device *dev, 964 void (*sync_single_for_device)(struct ib_device *dev,
888 u64 dma_handle, 965 u64 dma_handle,
889 size_t size, 966 size_t size,
@@ -919,6 +996,8 @@ struct ib_device {
919 996
920 struct iw_cm_verbs *iwcm; 997 struct iw_cm_verbs *iwcm;
921 998
999 int (*get_protocol_stats)(struct ib_device *device,
1000 union rdma_protocol_stats *stats);
922 int (*query_device)(struct ib_device *device, 1001 int (*query_device)(struct ib_device *device,
923 struct ib_device_attr *device_attr); 1002 struct ib_device_attr *device_attr);
924 int (*query_port)(struct ib_device *device, 1003 int (*query_port)(struct ib_device *device,
@@ -1013,6 +1092,11 @@ struct ib_device {
1013 int (*query_mr)(struct ib_mr *mr, 1092 int (*query_mr)(struct ib_mr *mr,
1014 struct ib_mr_attr *mr_attr); 1093 struct ib_mr_attr *mr_attr);
1015 int (*dereg_mr)(struct ib_mr *mr); 1094 int (*dereg_mr)(struct ib_mr *mr);
1095 struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd,
1096 int max_page_list_len);
1097 struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
1098 int page_list_len);
1099 void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list);
1016 int (*rereg_phys_mr)(struct ib_mr *mr, 1100 int (*rereg_phys_mr)(struct ib_mr *mr,
1017 int mr_rereg_mask, 1101 int mr_rereg_mask,
1018 struct ib_pd *pd, 1102 struct ib_pd *pd,
@@ -1065,6 +1149,7 @@ struct ib_device {
1065 1149
1066 char node_desc[64]; 1150 char node_desc[64];
1067 __be64 node_guid; 1151 __be64 node_guid;
1152 u32 local_dma_lkey;
1068 u8 node_type; 1153 u8 node_type;
1069 u8 phys_port_cnt; 1154 u8 phys_port_cnt;
1070}; 1155};
@@ -1807,6 +1892,54 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
1807int ib_dereg_mr(struct ib_mr *mr); 1892int ib_dereg_mr(struct ib_mr *mr);
1808 1893
1809/** 1894/**
1895 * ib_alloc_fast_reg_mr - Allocates memory region usable with the
1896 * IB_WR_FAST_REG_MR send work request.
1897 * @pd: The protection domain associated with the region.
1898 * @max_page_list_len: requested max physical buffer list length to be
1899 * used with fast register work requests for this MR.
1900 */
1901struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
1902
1903/**
1904 * ib_alloc_fast_reg_page_list - Allocates a page list array
1905 * @device - ib device pointer.
1906 * @page_list_len - size of the page list array to be allocated.
1907 *
1908 * This allocates and returns a struct ib_fast_reg_page_list * and a
1909 * page_list array that is at least page_list_len in size. The actual
1910 * size is returned in max_page_list_len. The caller is responsible
1911 * for initializing the contents of the page_list array before posting
1912 * a send work request with the IB_WC_FAST_REG_MR opcode.
1913 *
1914 * The page_list array entries must be translated using one of the
1915 * ib_dma_*() functions just like the addresses passed to
1916 * ib_map_phys_fmr(). Once the ib_post_send() is issued, the struct
1917 * ib_fast_reg_page_list must not be modified by the caller until the
1918 * IB_WC_FAST_REG_MR work request completes.
1919 */
1920struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(
1921 struct ib_device *device, int page_list_len);
1922
1923/**
1924 * ib_free_fast_reg_page_list - Deallocates a previously allocated
1925 * page list array.
1926 * @page_list - struct ib_fast_reg_page_list pointer to be deallocated.
1927 */
1928void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
1929
1930/**
1931 * ib_update_fast_reg_key - updates the key portion of the fast_reg MR
1932 * R_Key and L_Key.
1933 * @mr - struct ib_mr pointer to be updated.
1934 * @newkey - new key to be used.
1935 */
1936static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
1937{
1938 mr->lkey = (mr->lkey & 0xffffff00) | newkey;
1939 mr->rkey = (mr->rkey & 0xffffff00) | newkey;
1940}
1941
1942/**
1810 * ib_alloc_mw - Allocates a memory window. 1943 * ib_alloc_mw - Allocates a memory window.
1811 * @pd: The protection domain associated with the memory window. 1944 * @pd: The protection domain associated with the memory window.
1812 */ 1945 */
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
index aeefa9b740dc..cbb822e8d791 100644
--- a/include/rdma/iw_cm.h
+++ b/include/rdma/iw_cm.h
@@ -62,7 +62,7 @@ struct iw_cm_event {
62 struct sockaddr_in remote_addr; 62 struct sockaddr_in remote_addr;
63 void *private_data; 63 void *private_data;
64 u8 private_data_len; 64 u8 private_data_len;
65 void* provider_data; 65 void *provider_data;
66}; 66};
67 67
68/** 68/**
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 010f876f41d8..22bb2e7bab1a 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -2,29 +2,33 @@
2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation. All rights reserved. 3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
4 * 4 *
5 * This Software is licensed under one of the following licenses: 5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
6 * 10 *
7 * 1) under the terms of the "Common Public License 1.0" a copy of which is 11 * Redistribution and use in source and binary forms, with or
8 * available from the Open Source Initiative, see 12 * without modification, are permitted provided that the following
9 * http://www.opensource.org/licenses/cpl.php. 13 * conditions are met:
10 * 14 *
11 * 2) under the terms of the "The BSD License" a copy of which is 15 * - Redistributions of source code must retain the above
12 * available from the Open Source Initiative, see 16 * copyright notice, this list of conditions and the following
13 * http://www.opensource.org/licenses/bsd-license.php. 17 * disclaimer.
14 * 18 *
15 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a 19 * - Redistributions in binary form must reproduce the above
16 * copy of which is available from the Open Source Initiative, see 20 * copyright notice, this list of conditions and the following
17 * http://www.opensource.org/licenses/gpl-license.php. 21 * disclaimer in the documentation and/or other materials
18 * 22 * provided with the distribution.
19 * Licensee has the right to choose one of the above licenses.
20 *
21 * Redistributions of source code must retain the above copyright
22 * notice and one of the license notices.
23 *
24 * Redistributions in binary form must reproduce both the above copyright
25 * notice, one of the license notices in the documentation
26 * and/or other materials provided with the distribution.
27 * 23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
28 */ 32 */
29 33
30#if !defined(RDMA_CM_H) 34#if !defined(RDMA_CM_H)
@@ -57,11 +61,11 @@ enum rdma_cm_event_type {
57}; 61};
58 62
59enum rdma_port_space { 63enum rdma_port_space {
60 RDMA_PS_SDP = 0x0001, 64 RDMA_PS_SDP = 0x0001,
61 RDMA_PS_IPOIB= 0x0002, 65 RDMA_PS_IPOIB = 0x0002,
62 RDMA_PS_TCP = 0x0106, 66 RDMA_PS_TCP = 0x0106,
63 RDMA_PS_UDP = 0x0111, 67 RDMA_PS_UDP = 0x0111,
64 RDMA_PS_SCTP = 0x0183 68 RDMA_PS_SCTP = 0x0183
65}; 69};
66 70
67struct rdma_addr { 71struct rdma_addr {
diff --git a/include/rdma/rdma_cm_ib.h b/include/rdma/rdma_cm_ib.h
index 950424b38f16..2389c3b45404 100644
--- a/include/rdma/rdma_cm_ib.h
+++ b/include/rdma/rdma_cm_ib.h
@@ -1,29 +1,33 @@
1/* 1/*
2 * Copyright (c) 2006 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006 Intel Corporation. All rights reserved.
3 * 3 *
4 * This Software is licensed under one of the following licenses: 4 * This software is available to you under a choice of one of two
5 * 5 * licenses. You may choose to be licensed under the terms of the GNU
6 * 1) under the terms of the "Common Public License 1.0" a copy of which is 6 * General Public License (GPL) Version 2, available from the file
7 * available from the Open Source Initiative, see 7 * COPYING in the main directory of this source tree, or the
8 * http://www.opensource.org/licenses/cpl.php. 8 * OpenIB.org BSD license below:
9 * 9 *
10 * 2) under the terms of the "The BSD License" a copy of which is 10 * Redistribution and use in source and binary forms, with or
11 * available from the Open Source Initiative, see 11 * without modification, are permitted provided that the following
12 * http://www.opensource.org/licenses/bsd-license.php. 12 * conditions are met:
13 * 13 *
14 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a 14 * - Redistributions of source code must retain the above
15 * copy of which is available from the Open Source Initiative, see 15 * copyright notice, this list of conditions and the following
16 * http://www.opensource.org/licenses/gpl-license.php. 16 * disclaimer.
17 * 17 *
18 * Licensee has the right to choose one of the above licenses. 18 * - Redistributions in binary form must reproduce the above
19 * 19 * copyright notice, this list of conditions and the following
20 * Redistributions of source code must retain the above copyright 20 * disclaimer in the documentation and/or other materials
21 * notice and one of the license notices. 21 * provided with the distribution.
22 * 22 *
23 * Redistributions in binary form must reproduce both the above copyright 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * notice, one of the license notices in the documentation 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * and/or other materials provided with the distribution. 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
27 */ 31 */
28 32
29#if !defined(RDMA_CM_IB_H) 33#if !defined(RDMA_CM_IB_H)
diff --git a/kernel/backtracetest.c b/kernel/backtracetest.c
index d1a7605c5b8f..a5e026bc45c4 100644
--- a/kernel/backtracetest.c
+++ b/kernel/backtracetest.c
@@ -10,30 +10,73 @@
10 * of the License. 10 * of the License.
11 */ 11 */
12 12
13#include <linux/completion.h>
14#include <linux/delay.h>
15#include <linux/interrupt.h>
13#include <linux/module.h> 16#include <linux/module.h>
14#include <linux/sched.h> 17#include <linux/sched.h>
15#include <linux/delay.h> 18#include <linux/stacktrace.h>
19
20static void backtrace_test_normal(void)
21{
22 printk("Testing a backtrace from process context.\n");
23 printk("The following trace is a kernel self test and not a bug!\n");
16 24
17static struct timer_list backtrace_timer; 25 dump_stack();
26}
18 27
19static void backtrace_test_timer(unsigned long data) 28static DECLARE_COMPLETION(backtrace_work);
29
30static void backtrace_test_irq_callback(unsigned long data)
31{
32 dump_stack();
33 complete(&backtrace_work);
34}
35
36static DECLARE_TASKLET(backtrace_tasklet, &backtrace_test_irq_callback, 0);
37
38static void backtrace_test_irq(void)
20{ 39{
21 printk("Testing a backtrace from irq context.\n"); 40 printk("Testing a backtrace from irq context.\n");
22 printk("The following trace is a kernel self test and not a bug!\n"); 41 printk("The following trace is a kernel self test and not a bug!\n");
23 dump_stack(); 42
43 init_completion(&backtrace_work);
44 tasklet_schedule(&backtrace_tasklet);
45 wait_for_completion(&backtrace_work);
46}
47
48#ifdef CONFIG_STACKTRACE
49static void backtrace_test_saved(void)
50{
51 struct stack_trace trace;
52 unsigned long entries[8];
53
54 printk("Testing a saved backtrace.\n");
55 printk("The following trace is a kernel self test and not a bug!\n");
56
57 trace.nr_entries = 0;
58 trace.max_entries = ARRAY_SIZE(entries);
59 trace.entries = entries;
60 trace.skip = 0;
61
62 save_stack_trace(&trace);
63 print_stack_trace(&trace, 0);
64}
65#else
66static void backtrace_test_saved(void)
67{
68 printk("Saved backtrace test skipped.\n");
24} 69}
70#endif
71
25static int backtrace_regression_test(void) 72static int backtrace_regression_test(void)
26{ 73{
27 printk("====[ backtrace testing ]===========\n"); 74 printk("====[ backtrace testing ]===========\n");
28 printk("Testing a backtrace from process context.\n");
29 printk("The following trace is a kernel self test and not a bug!\n");
30 dump_stack();
31 75
32 init_timer(&backtrace_timer); 76 backtrace_test_normal();
33 backtrace_timer.function = backtrace_test_timer; 77 backtrace_test_irq();
34 mod_timer(&backtrace_timer, jiffies + 10); 78 backtrace_test_saved();
35 79
36 msleep(10);
37 printk("====[ end of backtrace testing ]====\n"); 80 printk("====[ end of backtrace testing ]====\n");
38 return 0; 81 return 0;
39} 82}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 27a83ee41443..2913a8bff612 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -300,11 +300,10 @@ EXPORT_SYMBOL_GPL(ktime_sub_ns);
300 */ 300 */
301u64 ktime_divns(const ktime_t kt, s64 div) 301u64 ktime_divns(const ktime_t kt, s64 div)
302{ 302{
303 u64 dclc, inc, dns; 303 u64 dclc;
304 int sft = 0; 304 int sft = 0;
305 305
306 dclc = dns = ktime_to_ns(kt); 306 dclc = ktime_to_ns(kt);
307 inc = div;
308 /* Make sure the divisor is less than 2^32: */ 307 /* Make sure the divisor is less than 2^32: */
309 while (div >> 32) { 308 while (div >> 32) {
310 sft++; 309 sft++;
@@ -632,8 +631,6 @@ void clock_was_set(void)
632 */ 631 */
633void hres_timers_resume(void) 632void hres_timers_resume(void)
634{ 633{
635 WARN_ON_ONCE(num_online_cpus() > 1);
636
637 /* Retrigger the CPU local events: */ 634 /* Retrigger the CPU local events: */
638 retrigger_next_event(NULL); 635 retrigger_next_event(NULL);
639} 636}
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 46d6611a33bb..77a51be36010 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -17,6 +17,8 @@
17 17
18#ifdef CONFIG_SMP 18#ifdef CONFIG_SMP
19 19
20cpumask_t irq_default_affinity = CPU_MASK_ALL;
21
20/** 22/**
21 * synchronize_irq - wait for pending IRQ handlers (on other CPUs) 23 * synchronize_irq - wait for pending IRQ handlers (on other CPUs)
22 * @irq: interrupt number to wait for 24 * @irq: interrupt number to wait for
@@ -95,6 +97,27 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
95 return 0; 97 return 0;
96} 98}
97 99
100#ifndef CONFIG_AUTO_IRQ_AFFINITY
101/*
102 * Generic version of the affinity autoselector.
103 */
104int irq_select_affinity(unsigned int irq)
105{
106 cpumask_t mask;
107
108 if (!irq_can_set_affinity(irq))
109 return 0;
110
111 cpus_and(mask, cpu_online_map, irq_default_affinity);
112
113 irq_desc[irq].affinity = mask;
114 irq_desc[irq].chip->set_affinity(irq, mask);
115
116 set_balance_irq_affinity(irq, mask);
117 return 0;
118}
119#endif
120
98#endif 121#endif
99 122
100/** 123/**
@@ -354,7 +377,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
354 377
355 /* Setup the type (level, edge polarity) if configured: */ 378 /* Setup the type (level, edge polarity) if configured: */
356 if (new->flags & IRQF_TRIGGER_MASK) { 379 if (new->flags & IRQF_TRIGGER_MASK) {
357 if (desc->chip && desc->chip->set_type) 380 if (desc->chip->set_type)
358 desc->chip->set_type(irq, 381 desc->chip->set_type(irq,
359 new->flags & IRQF_TRIGGER_MASK); 382 new->flags & IRQF_TRIGGER_MASK);
360 else 383 else
@@ -364,8 +387,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
364 */ 387 */
365 printk(KERN_WARNING "No IRQF_TRIGGER set_type " 388 printk(KERN_WARNING "No IRQF_TRIGGER set_type "
366 "function for IRQ %d (%s)\n", irq, 389 "function for IRQ %d (%s)\n", irq,
367 desc->chip ? desc->chip->name : 390 desc->chip->name);
368 "unknown");
369 } else 391 } else
370 compat_irq_chip_set_default_handler(desc); 392 compat_irq_chip_set_default_handler(desc);
371 393
@@ -382,6 +404,9 @@ int setup_irq(unsigned int irq, struct irqaction *new)
382 } else 404 } else
383 /* Undo nested disables: */ 405 /* Undo nested disables: */
384 desc->depth = 1; 406 desc->depth = 1;
407
408 /* Set default affinity mask once everything is setup */
409 irq_select_affinity(irq);
385 } 410 }
386 /* Reset broken irq detection when installing new handler */ 411 /* Reset broken irq detection when installing new handler */
387 desc->irq_count = 0; 412 desc->irq_count = 0;
@@ -571,8 +596,6 @@ int request_irq(unsigned int irq, irq_handler_t handler,
571 action->next = NULL; 596 action->next = NULL;
572 action->dev_id = dev_id; 597 action->dev_id = dev_id;
573 598
574 select_smp_affinity(irq);
575
576#ifdef CONFIG_DEBUG_SHIRQ 599#ifdef CONFIG_DEBUG_SHIRQ
577 if (irqflags & IRQF_SHARED) { 600 if (irqflags & IRQF_SHARED) {
578 /* 601 /*
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index c2f2ccb0549a..6c6d35d68ee9 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -44,7 +44,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
44 unsigned long count, void *data) 44 unsigned long count, void *data)
45{ 45{
46 unsigned int irq = (int)(long)data, full_count = count, err; 46 unsigned int irq = (int)(long)data, full_count = count, err;
47 cpumask_t new_value, tmp; 47 cpumask_t new_value;
48 48
49 if (!irq_desc[irq].chip->set_affinity || no_irq_affinity || 49 if (!irq_desc[irq].chip->set_affinity || no_irq_affinity ||
50 irq_balancing_disabled(irq)) 50 irq_balancing_disabled(irq))
@@ -62,17 +62,51 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
62 * way to make the system unusable accidentally :-) At least 62 * way to make the system unusable accidentally :-) At least
63 * one online CPU still has to be targeted. 63 * one online CPU still has to be targeted.
64 */ 64 */
65 cpus_and(tmp, new_value, cpu_online_map); 65 if (!cpus_intersects(new_value, cpu_online_map))
66 if (cpus_empty(tmp))
67 /* Special case for empty set - allow the architecture 66 /* Special case for empty set - allow the architecture
68 code to set default SMP affinity. */ 67 code to set default SMP affinity. */
69 return select_smp_affinity(irq) ? -EINVAL : full_count; 68 return irq_select_affinity(irq) ? -EINVAL : full_count;
70 69
71 irq_set_affinity(irq, new_value); 70 irq_set_affinity(irq, new_value);
72 71
73 return full_count; 72 return full_count;
74} 73}
75 74
75static int default_affinity_read(char *page, char **start, off_t off,
76 int count, int *eof, void *data)
77{
78 int len = cpumask_scnprintf(page, count, irq_default_affinity);
79 if (count - len < 2)
80 return -EINVAL;
81 len += sprintf(page + len, "\n");
82 return len;
83}
84
85static int default_affinity_write(struct file *file, const char __user *buffer,
86 unsigned long count, void *data)
87{
88 unsigned int full_count = count, err;
89 cpumask_t new_value;
90
91 err = cpumask_parse_user(buffer, count, new_value);
92 if (err)
93 return err;
94
95 if (!is_affinity_mask_valid(new_value))
96 return -EINVAL;
97
98 /*
99 * Do not allow disabling IRQs completely - it's a too easy
100 * way to make the system unusable accidentally :-) At least
101 * one online CPU still has to be targeted.
102 */
103 if (!cpus_intersects(new_value, cpu_online_map))
104 return -EINVAL;
105
106 irq_default_affinity = new_value;
107
108 return full_count;
109}
76#endif 110#endif
77 111
78static int irq_spurious_read(char *page, char **start, off_t off, 112static int irq_spurious_read(char *page, char **start, off_t off,
@@ -171,6 +205,21 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action)
171 remove_proc_entry(action->dir->name, irq_desc[irq].dir); 205 remove_proc_entry(action->dir->name, irq_desc[irq].dir);
172} 206}
173 207
208void register_default_affinity_proc(void)
209{
210#ifdef CONFIG_SMP
211 struct proc_dir_entry *entry;
212
213 /* create /proc/irq/default_smp_affinity */
214 entry = create_proc_entry("default_smp_affinity", 0600, root_irq_dir);
215 if (entry) {
216 entry->data = NULL;
217 entry->read_proc = default_affinity_read;
218 entry->write_proc = default_affinity_write;
219 }
220#endif
221}
222
174void init_irq_proc(void) 223void init_irq_proc(void)
175{ 224{
176 int i; 225 int i;
@@ -180,6 +229,8 @@ void init_irq_proc(void)
180 if (!root_irq_dir) 229 if (!root_irq_dir)
181 return; 230 return;
182 231
232 register_default_affinity_proc();
233
183 /* 234 /*
184 * Create entries for all existing IRQs. 235 * Create entries for all existing IRQs.
185 */ 236 */
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index f1525ad06cb3..c42a03aef36f 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1037,6 +1037,9 @@ static void check_thread_timers(struct task_struct *tsk,
1037 sig->rlim[RLIMIT_RTTIME].rlim_cur += 1037 sig->rlim[RLIMIT_RTTIME].rlim_cur +=
1038 USEC_PER_SEC; 1038 USEC_PER_SEC;
1039 } 1039 }
1040 printk(KERN_INFO
1041 "RT Watchdog Timeout: %s[%d]\n",
1042 tsk->comm, task_pid_nr(tsk));
1040 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); 1043 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
1041 } 1044 }
1042 } 1045 }
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index b71816e47a30..94b527ef1d1e 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -6,19 +6,21 @@
6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 */ 7 */
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/module.h>
9#include <linux/kallsyms.h> 10#include <linux/kallsyms.h>
10#include <linux/stacktrace.h> 11#include <linux/stacktrace.h>
11 12
12void print_stack_trace(struct stack_trace *trace, int spaces) 13void print_stack_trace(struct stack_trace *trace, int spaces)
13{ 14{
14 int i, j; 15 int i;
15 16
16 for (i = 0; i < trace->nr_entries; i++) { 17 if (WARN_ON(!trace->entries))
17 unsigned long ip = trace->entries[i]; 18 return;
18 19
19 for (j = 0; j < spaces + 1; j++) 20 for (i = 0; i < trace->nr_entries; i++) {
20 printk(" "); 21 printk("%*c", 1 + spaces, ' ');
21 print_ip_sym(ip); 22 print_ip_sym(trace->entries[i]);
22 } 23 }
23} 24}
25EXPORT_SYMBOL_GPL(print_stack_trace);
24 26
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d63008b09a4c..beef7ccdf842 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -48,6 +48,13 @@ static void tick_do_update_jiffies64(ktime_t now)
48 unsigned long ticks = 0; 48 unsigned long ticks = 0;
49 ktime_t delta; 49 ktime_t delta;
50 50
51 /*
52 * Do a quick check without holding xtime_lock:
53 */
54 delta = ktime_sub(now, last_jiffies_update);
55 if (delta.tv64 < tick_period.tv64)
56 return;
57
51 /* Reevalute with xtime_lock held */ 58 /* Reevalute with xtime_lock held */
52 write_seqlock(&xtime_lock); 59 write_seqlock(&xtime_lock);
53 60
@@ -228,6 +235,7 @@ void tick_nohz_stop_sched_tick(void)
228 local_softirq_pending()); 235 local_softirq_pending());
229 ratelimit++; 236 ratelimit++;
230 } 237 }
238 goto end;
231 } 239 }
232 240
233 ts->idle_calls++; 241 ts->idle_calls++;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d8b6279a9b42..c459e8547bd8 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -419,7 +419,6 @@ config DEBUG_LOCKING_API_SELFTESTS
419 419
420config STACKTRACE 420config STACKTRACE
421 bool 421 bool
422 depends on DEBUG_KERNEL
423 depends on STACKTRACE_SUPPORT 422 depends on STACKTRACE_SUPPORT
424 423
425config DEBUG_KOBJECT 424config DEBUG_KOBJECT
@@ -563,6 +562,9 @@ config BACKTRACE_SELF_TEST
563 for distributions or general kernels, but only for kernel 562 for distributions or general kernels, but only for kernel
564 developers working on architecture code. 563 developers working on architecture code.
565 564
565 Note that if you want to also test saved backtraces, you will
566 have to enable STACKTRACE as well.
567
566 Say N if you are unsure. 568 Say N if you are unsure.
567 569
568config LKDTM 570config LKDTM
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 119174494cb5..4a8ba4bf5f6f 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add);
52 * Add up all the per-cpu counts, return the result. This is a more accurate 52 * Add up all the per-cpu counts, return the result. This is a more accurate
53 * but much slower version of percpu_counter_read_positive() 53 * but much slower version of percpu_counter_read_positive()
54 */ 54 */
55s64 __percpu_counter_sum(struct percpu_counter *fbc) 55s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
56{ 56{
57 s64 ret; 57 s64 ret;
58 int cpu; 58 int cpu;
@@ -62,7 +62,12 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
62 for_each_online_cpu(cpu) { 62 for_each_online_cpu(cpu) {
63 s32 *pcount = per_cpu_ptr(fbc->counters, cpu); 63 s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
64 ret += *pcount; 64 ret += *pcount;
65 if (set)
66 *pcount = 0;
65 } 67 }
68 if (set)
69 fbc->count = ret;
70
66 spin_unlock(&fbc->lock); 71 spin_unlock(&fbc->lock);
67 return ret; 72 return ret;
68} 73}
diff --git a/mm/filemap.c b/mm/filemap.c
index 1e6a7d34874f..65d9d9e2b755 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -236,11 +236,12 @@ int filemap_fdatawrite(struct address_space *mapping)
236} 236}
237EXPORT_SYMBOL(filemap_fdatawrite); 237EXPORT_SYMBOL(filemap_fdatawrite);
238 238
239static int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, 239int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
240 loff_t end) 240 loff_t end)
241{ 241{
242 return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL); 242 return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
243} 243}
244EXPORT_SYMBOL(filemap_fdatawrite_range);
244 245
245/** 246/**
246 * filemap_flush - mostly a non-blocking flush 247 * filemap_flush - mostly a non-blocking flush
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b38f700825fc..94c6d8988ab3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -960,6 +960,9 @@ retry:
960 } 960 }
961 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 961 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
962 mapping->writeback_index = index; 962 mapping->writeback_index = index;
963
964 if (wbc->range_cont)
965 wbc->range_start = index << PAGE_CACHE_SHIFT;
963 return ret; 966 return ret;
964} 967}
965EXPORT_SYMBOL(write_cache_pages); 968EXPORT_SYMBOL(write_cache_pages);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 91200feb3f9c..63f131fc42e4 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -555,15 +555,13 @@ static int selinux_set_mnt_opts(struct super_block *sb,
555 struct task_security_struct *tsec = current->security; 555 struct task_security_struct *tsec = current->security;
556 struct superblock_security_struct *sbsec = sb->s_security; 556 struct superblock_security_struct *sbsec = sb->s_security;
557 const char *name = sb->s_type->name; 557 const char *name = sb->s_type->name;
558 struct dentry *root = sb->s_root; 558 struct inode *inode = sbsec->sb->s_root->d_inode;
559 struct inode *root_inode = root->d_inode; 559 struct inode_security_struct *root_isec = inode->i_security;
560 struct inode_security_struct *root_isec = root_inode->i_security;
561 u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0; 560 u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0;
562 u32 defcontext_sid = 0; 561 u32 defcontext_sid = 0;
563 char **mount_options = opts->mnt_opts; 562 char **mount_options = opts->mnt_opts;
564 int *flags = opts->mnt_opts_flags; 563 int *flags = opts->mnt_opts_flags;
565 int num_opts = opts->num_mnt_opts; 564 int num_opts = opts->num_mnt_opts;
566 bool can_xattr = false;
567 565
568 mutex_lock(&sbsec->lock); 566 mutex_lock(&sbsec->lock);
569 567
@@ -667,24 +665,14 @@ static int selinux_set_mnt_opts(struct super_block *sb,
667 goto out; 665 goto out;
668 } 666 }
669 667
670 if (strcmp(name, "proc") == 0) 668 if (strcmp(sb->s_type->name, "proc") == 0)
671 sbsec->proc = 1; 669 sbsec->proc = 1;
672 670
673 /*
674 * test if the fs supports xattrs, fs_use might make use of this if the
675 * fs has no definition in policy.
676 */
677 if (root_inode->i_op->getxattr) {
678 rc = root_inode->i_op->getxattr(root, XATTR_NAME_SELINUX, NULL, 0);
679 if (rc >= 0 || rc == -ENODATA)
680 can_xattr = true;
681 }
682
683 /* Determine the labeling behavior to use for this filesystem type. */ 671 /* Determine the labeling behavior to use for this filesystem type. */
684 rc = security_fs_use(name, &sbsec->behavior, &sbsec->sid, can_xattr); 672 rc = security_fs_use(sb->s_type->name, &sbsec->behavior, &sbsec->sid);
685 if (rc) { 673 if (rc) {
686 printk(KERN_WARNING "%s: security_fs_use(%s) returned %d\n", 674 printk(KERN_WARNING "%s: security_fs_use(%s) returned %d\n",
687 __func__, name, rc); 675 __func__, sb->s_type->name, rc);
688 goto out; 676 goto out;
689 } 677 }
690 678
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 44cba2e21dcf..7c543003d653 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -136,7 +136,7 @@ int security_get_allow_unknown(void);
136#define SECURITY_FS_USE_MNTPOINT 6 /* use mountpoint labeling */ 136#define SECURITY_FS_USE_MNTPOINT 6 /* use mountpoint labeling */
137 137
138int security_fs_use(const char *fstype, unsigned int *behavior, 138int security_fs_use(const char *fstype, unsigned int *behavior,
139 u32 *sid, bool can_xattr); 139 u32 *sid);
140 140
141int security_genfs_sid(const char *fstype, char *name, u16 sclass, 141int security_genfs_sid(const char *fstype, char *name, u16 sclass,
142 u32 *sid); 142 u32 *sid);
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 8e42da120101..b52f923ce680 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -1934,8 +1934,7 @@ out:
1934int security_fs_use( 1934int security_fs_use(
1935 const char *fstype, 1935 const char *fstype,
1936 unsigned int *behavior, 1936 unsigned int *behavior,
1937 u32 *sid, 1937 u32 *sid)
1938 bool can_xattr)
1939{ 1938{
1940 int rc = 0; 1939 int rc = 0;
1941 struct ocontext *c; 1940 struct ocontext *c;
@@ -1949,7 +1948,6 @@ int security_fs_use(
1949 c = c->next; 1948 c = c->next;
1950 } 1949 }
1951 1950
1952 /* look for labeling behavior defined in policy */
1953 if (c) { 1951 if (c) {
1954 *behavior = c->v.behavior; 1952 *behavior = c->v.behavior;
1955 if (!c->sid[0]) { 1953 if (!c->sid[0]) {
@@ -1960,23 +1958,14 @@ int security_fs_use(
1960 goto out; 1958 goto out;
1961 } 1959 }
1962 *sid = c->sid[0]; 1960 *sid = c->sid[0];
1963 goto out;
1964 }
1965
1966 /* labeling behavior not in policy, use xattrs if possible */
1967 if (can_xattr) {
1968 *behavior = SECURITY_FS_USE_XATTR;
1969 *sid = SECINITSID_FS;
1970 goto out;
1971 }
1972
1973 /* no behavior in policy and can't use xattrs, try GENFS */
1974 rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, sid);
1975 if (rc) {
1976 *behavior = SECURITY_FS_USE_NONE;
1977 rc = 0;
1978 } else { 1961 } else {
1979 *behavior = SECURITY_FS_USE_GENFS; 1962 rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, sid);
1963 if (rc) {
1964 *behavior = SECURITY_FS_USE_NONE;
1965 rc = 0;
1966 } else {
1967 *behavior = SECURITY_FS_USE_GENFS;
1968 }
1980 } 1969 }
1981 1970
1982out: 1971out: